diff --git "a/rpi5/result.json" "b/rpi5/result.json" new file mode 100644--- /dev/null +++ "b/rpi5/result.json" @@ -0,0 +1,204686 @@ +{ + "timestamp_utc": "2025-12-08T20:03:22.551221+00:00", + "bench_binary": "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "system": { + "hostname": "raspberrypi", + "platform": "Linux-6.12.34+rpt-rpi-2712-aarch64-with-glibc2.36", + "python": "3.11.2", + "cpu_count": 4, + "cpu_info": { + "lscpu": [ + { + "field": "Architecture:", + "data": "aarch64" + }, + { + "field": "CPU op-mode(s):", + "data": "32-bit, 64-bit" + }, + { + "field": "Byte Order:", + "data": "Little Endian" + }, + { + "field": "CPU(s):", + "data": "4" + }, + { + "field": "On-line CPU(s) list:", + "data": "0-3" + }, + { + "field": "Vendor ID:", + "data": "ARM" + }, + { + "field": "Model name:", + "data": "Cortex-A76" + }, + { + "field": "Model:", + "data": "1" + }, + { + "field": "Thread(s) per core:", + "data": "1" + }, + { + "field": "Core(s) per cluster:", + "data": "4" + }, + { + "field": "Socket(s):", + "data": "-" + }, + { + "field": "Cluster(s):", + "data": "1" + }, + { + "field": "Stepping:", + "data": "r4p1" + }, + { + "field": "CPU(s) scaling MHz:", + "data": "100%" + }, + { + "field": "CPU max MHz:", + "data": "2400.0000" + }, + { + "field": "CPU min MHz:", + "data": "1500.0000" + }, + { + "field": "BogoMIPS:", + "data": "108.00" + }, + { + "field": "Flags:", + "data": "fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop asimddp" + }, + { + "field": "L1d cache:", + "data": "256 KiB (4 instances)" + }, + { + "field": "L1i cache:", + "data": "256 KiB (4 instances)" + }, + { + "field": "L2 cache:", + "data": "2 MiB (4 instances)" + }, + { + "field": "L3 cache:", + "data": "2 MiB (1 instance)" + }, + { + "field": "NUMA node(s):", + "data": "8" + }, + { + "field": "NUMA node0 CPU(s):", + "data": "0-3" + }, + { + "field": "NUMA node1 CPU(s):", + "data": "0-3" + }, + { + "field": "NUMA node2 CPU(s):", + "data": "0-3" + }, + { + "field": "NUMA node3 CPU(s):", + "data": "0-3" + }, + { + "field": "NUMA node4 CPU(s):", + "data": "0-3" + }, + { + "field": "NUMA node5 CPU(s):", + "data": "0-3" + }, + { + "field": "NUMA node6 CPU(s):", + "data": "0-3" + }, + { + "field": "NUMA node7 CPU(s):", + "data": "0-3" + }, + { + "field": "Vulnerability Gather data sampling:", + "data": "Not affected" + }, + { + "field": "Vulnerability Indirect target selection:", + "data": "Not affected" + }, + { + "field": "Vulnerability Itlb multihit:", + "data": "Not affected" + }, + { + "field": "Vulnerability L1tf:", + "data": "Not affected" + }, + { + "field": "Vulnerability Mds:", + "data": "Not affected" + }, + { + "field": "Vulnerability Meltdown:", + "data": "Not affected" + }, + { + "field": "Vulnerability Mmio stale data:", + "data": "Not affected" + }, + { + "field": "Vulnerability Reg file data sampling:", + "data": "Not affected" + }, + { + "field": "Vulnerability Retbleed:", + "data": "Not affected" + }, + { + "field": "Vulnerability Spec rstack overflow:", + "data": "Not affected" + }, + { + "field": "Vulnerability Spec store bypass:", + "data": "Mitigation; Speculative Store Bypass disabled via prctl" + }, + { + "field": "Vulnerability Spectre v1:", + "data": "Mitigation; __user pointer sanitization" + }, + { + "field": "Vulnerability Spectre v2:", + "data": "Mitigation; CSV2, BHB" + }, + { + "field": "Vulnerability Srbds:", + "data": "Not affected" + }, + { + "field": "Vulnerability Tsx async abort:", + "data": "Not affected" + } + ] + }, + "total_ram_bytes": 8454881280 + }, + "runs": [ + { + "timestamp_utc": "2025-12-08T20:24:43.504957+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:24:24Z\",\n \"avg_ns\": 1708050119,\n \"stddev_ns\": 3793891,\n \"avg_ts\": 74.939499,\n \"stddev_ts\": 0.166238,\n \"samples_ns\": [ 1706180773, 1705553894, 1712415691 ],\n \"samples_ts\": [ 75.0214, 75.0489, 74.7482 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:24:30Z\",\n \"avg_ns\": 4162604391,\n \"stddev_ns\": 6687761,\n \"avg_ts\": 30.750032,\n \"stddev_ts\": 0.049373,\n \"samples_ns\": [ 4170023269, 4160751385, 4157038519 ],\n \"samples_ts\": [ 30.6953, 30.7637, 30.7912 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:24:24Z", + "avg_ns": 1708050119, + "stddev_ns": 3793891, + "avg_ts": 74.939499, + "stddev_ts": 0.166238, + "samples_ns": [ + 1706180773, + 1705553894, + 1712415691 + ], + "samples_ts": [ + 75.0214, + 75.0489, + 74.7482 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:24:30Z", + "avg_ns": 4162604391, + "stddev_ns": 6687761, + "avg_ts": 30.750032, + "stddev_ts": 0.049373, + "samples_ns": [ + 4170023269, + 4160751385, + 4157038519 + ], + "samples_ts": [ + 30.6953, + 30.7637, + 30.7912 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 0 + }, + { + "timestamp_utc": "2025-12-08T20:25:42.511612+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:24:44Z\",\n \"avg_ns\": 1705299723,\n \"stddev_ns\": 350191,\n \"avg_ts\": 75.060121,\n \"stddev_ts\": 0.015197,\n \"samples_ns\": [ 1705691007, 1705037775, 1705170389 ],\n \"samples_ts\": [ 75.0429, 75.0717, 75.0658 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:24:50Z\",\n \"avg_ns\": 17137617759,\n \"stddev_ns\": 6337087,\n \"avg_ts\": 29.875800,\n \"stddev_ts\": 0.011046,\n \"samples_ns\": [ 17130782920, 17138775695, 17143294663 ],\n \"samples_ts\": [ 29.8877, 29.8738, 29.8659 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:24:44Z", + "avg_ns": 1705299723, + "stddev_ns": 350191, + "avg_ts": 75.060121, + "stddev_ts": 0.015197, + "samples_ns": [ + 1705691007, + 1705037775, + 1705170389 + ], + "samples_ts": [ + 75.0429, + 75.0717, + 75.0658 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:24:50Z", + "avg_ns": 17137617759, + "stddev_ns": 6337087, + "avg_ts": 29.8758, + "stddev_ts": 0.011046, + "samples_ns": [ + 17130782920, + 17138775695, + 17143294663 + ], + "samples_ts": [ + 29.8877, + 29.8738, + 29.8659 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1 + }, + { + "timestamp_utc": "2025-12-08T20:26:23.728240+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:25:43Z\",\n \"avg_ns\": 7001476404,\n \"stddev_ns\": 200819,\n \"avg_ts\": 73.127434,\n \"stddev_ts\": 0.002097,\n \"samples_ns\": [ 7001458021, 7001285408, 7001685783 ],\n \"samples_ts\": [ 73.1276, 73.1294, 73.1252 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:26:11Z\",\n \"avg_ns\": 4146525296,\n \"stddev_ns\": 1496355,\n \"avg_ts\": 30.869222,\n \"stddev_ts\": 0.011137,\n \"samples_ns\": [ 4148253085, 4145673142, 4145649661 ],\n \"samples_ts\": [ 30.8564, 30.8756, 30.8757 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:25:43Z", + "avg_ns": 7001476404, + "stddev_ns": 200819, + "avg_ts": 73.127434, + "stddev_ts": 0.002097, + "samples_ns": [ + 7001458021, + 7001285408, + 7001685783 + ], + "samples_ts": [ + 73.1276, + 73.1294, + 73.1252 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:26:11Z", + "avg_ns": 4146525296, + "stddev_ns": 1496355, + "avg_ts": 30.869222, + "stddev_ts": 0.011137, + "samples_ns": [ + 4148253085, + 4145673142, + 4145649661 + ], + "samples_ts": [ + 30.8564, + 30.8756, + 30.8757 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 2 + }, + { + "timestamp_utc": "2025-12-08T20:27:43.734241+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:26:24Z\",\n \"avg_ns\": 7010795962,\n \"stddev_ns\": 455739,\n \"avg_ts\": 73.030224,\n \"stddev_ts\": 0.004747,\n \"samples_ns\": [ 7010346197, 7011257450, 7010784239 ],\n \"samples_ts\": [ 73.0349, 73.0254, 73.0303 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:26:52Z\",\n \"avg_ns\": 17062348560,\n \"stddev_ns\": 26609810,\n \"avg_ts\": 30.007641,\n \"stddev_ts\": 0.046771,\n \"samples_ns\": [ 17055403436, 17091741613, 17039900633 ],\n \"samples_ts\": [ 30.0198, 29.956, 30.0471 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:26:24Z", + "avg_ns": 7010795962, + "stddev_ns": 455739, + "avg_ts": 73.030224, + "stddev_ts": 0.004747, + "samples_ns": [ + 7010346197, + 7011257450, + 7010784239 + ], + "samples_ts": [ + 73.0349, + 73.0254, + 73.0303 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:26:52Z", + "avg_ns": 17062348560, + "stddev_ns": 26609810, + "avg_ts": 30.007641, + "stddev_ts": 0.046771, + "samples_ns": [ + 17055403436, + 17091741613, + 17039900633 + ], + "samples_ts": [ + 30.0198, + 29.956, + 30.0471 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 3 + }, + { + "timestamp_utc": "2025-12-08T20:28:03.811762+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:27:44Z\",\n \"avg_ns\": 1703846957,\n \"stddev_ns\": 556012,\n \"avg_ts\": 75.124123,\n \"stddev_ts\": 0.024517,\n \"samples_ns\": [ 1704365322, 1703259714, 1703915835 ],\n \"samples_ts\": [ 75.1013, 75.15, 75.1211 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:27:51Z\",\n \"avg_ns\": 4163695750,\n \"stddev_ns\": 296574,\n \"avg_ts\": 30.741920,\n \"stddev_ts\": 0.002083,\n \"samples_ns\": [ 4163631960, 4164004372, 4163450920 ],\n \"samples_ts\": [ 30.7424, 30.7396, 30.7437 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:27:44Z", + "avg_ns": 1703846957, + "stddev_ns": 556012, + "avg_ts": 75.124123, + "stddev_ts": 0.024517, + "samples_ns": [ + 1704365322, + 1703259714, + 1703915835 + ], + "samples_ts": [ + 75.1013, + 75.15, + 75.1211 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:27:51Z", + "avg_ns": 4163695750, + "stddev_ns": 296574, + "avg_ts": 30.74192, + "stddev_ts": 0.002083, + "samples_ns": [ + 4163631960, + 4164004372, + 4163450920 + ], + "samples_ts": [ + 30.7424, + 30.7396, + 30.7437 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 4 + }, + { + "timestamp_utc": "2025-12-08T20:29:02.770664+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:28:04Z\",\n \"avg_ns\": 1703759789,\n \"stddev_ns\": 20170,\n \"avg_ts\": 75.127962,\n \"stddev_ts\": 0.000889,\n \"samples_ns\": [ 1703755610, 1703781722, 1703742035 ],\n \"samples_ts\": [ 75.1281, 75.127, 75.1287 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:28:11Z\",\n \"avg_ns\": 17121109566,\n \"stddev_ns\": 9762306,\n \"avg_ts\": 29.904610,\n \"stddev_ts\": 0.017054,\n \"samples_ns\": [ 17109902545, 17125678480, 17127747675 ],\n \"samples_ts\": [ 29.9242, 29.8966, 29.893 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:28:04Z", + "avg_ns": 1703759789, + "stddev_ns": 20170, + "avg_ts": 75.127962, + "stddev_ts": 0.000889, + "samples_ns": [ + 1703755610, + 1703781722, + 1703742035 + ], + "samples_ts": [ + 75.1281, + 75.127, + 75.1287 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:28:11Z", + "avg_ns": 17121109566, + "stddev_ns": 9762306, + "avg_ts": 29.90461, + "stddev_ts": 0.017054, + "samples_ns": [ + 17109902545, + 17125678480, + 17127747675 + ], + "samples_ts": [ + 29.9242, + 29.8966, + 29.893 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 5 + }, + { + "timestamp_utc": "2025-12-08T20:29:43.987782+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:29:03Z\",\n \"avg_ns\": 7016673493,\n \"stddev_ns\": 1102322,\n \"avg_ts\": 72.969051,\n \"stddev_ts\": 0.011464,\n \"samples_ns\": [ 7015482082, 7016881230, 7017657167 ],\n \"samples_ts\": [ 72.9814, 72.9669, 72.9588 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:29:31Z\",\n \"avg_ns\": 4125206112,\n \"stddev_ns\": 1332113,\n \"avg_ts\": 31.028755,\n \"stddev_ts\": 0.009995,\n \"samples_ns\": [ 4124878028, 4126668441, 4124071869 ],\n \"samples_ts\": [ 31.0312, 31.0178, 31.0373 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:29:03Z", + "avg_ns": 7016673493, + "stddev_ns": 1102322, + "avg_ts": 72.969051, + "stddev_ts": 0.011464, + "samples_ns": [ + 7015482082, + 7016881230, + 7017657167 + ], + "samples_ts": [ + 72.9814, + 72.9669, + 72.9588 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:29:31Z", + "avg_ns": 4125206112, + "stddev_ns": 1332113, + "avg_ts": 31.028755, + "stddev_ts": 0.009995, + "samples_ns": [ + 4124878028, + 4126668441, + 4124071869 + ], + "samples_ts": [ + 31.0312, + 31.0178, + 31.0373 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 6 + }, + { + "timestamp_utc": "2025-12-08T20:31:03.952283+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:29:44Z\",\n \"avg_ns\": 7023906466,\n \"stddev_ns\": 536852,\n \"avg_ts\": 72.893909,\n \"stddev_ts\": 0.005434,\n \"samples_ns\": [ 7023512708, 7024500685, 7023706007 ],\n \"samples_ts\": [ 72.898, 72.8877, 72.896 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:30:12Z\",\n \"avg_ns\": 17030511469,\n \"stddev_ns\": 23989276,\n \"avg_ts\": 30.063729,\n \"stddev_ts\": 0.042380,\n \"samples_ns\": [ 17002959652, 17041809196, 17046765560 ],\n \"samples_ts\": [ 30.1124, 30.0438, 30.035 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:29:44Z", + "avg_ns": 7023906466, + "stddev_ns": 536852, + "avg_ts": 72.893909, + "stddev_ts": 0.005434, + "samples_ns": [ + 7023512708, + 7024500685, + 7023706007 + ], + "samples_ts": [ + 72.898, + 72.8877, + 72.896 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:30:12Z", + "avg_ns": 17030511469, + "stddev_ns": 23989276, + "avg_ts": 30.063729, + "stddev_ts": 0.04238, + "samples_ns": [ + 17002959652, + 17041809196, + 17046765560 + ], + "samples_ts": [ + 30.1124, + 30.0438, + 30.035 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 7 + }, + { + "timestamp_utc": "2025-12-08T20:31:23.911398+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:31:04Z\",\n \"avg_ns\": 1703396179,\n \"stddev_ns\": 707283,\n \"avg_ts\": 75.144007,\n \"stddev_ts\": 0.031101,\n \"samples_ns\": [ 1702603854, 1703953613, 1703631072 ],\n \"samples_ts\": [ 75.179, 75.1194, 75.1336 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:31:11Z\",\n \"avg_ns\": 4124576863,\n \"stddev_ns\": 1110120,\n \"avg_ts\": 31.033488,\n \"stddev_ts\": 0.008337,\n \"samples_ns\": [ 4125852411, 4124028406, 4123849773 ],\n \"samples_ts\": [ 31.0239, 31.0376, 31.039 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:31:04Z", + "avg_ns": 1703396179, + "stddev_ns": 707283, + "avg_ts": 75.144007, + "stddev_ts": 0.031101, + "samples_ns": [ + 1702603854, + 1703953613, + 1703631072 + ], + "samples_ts": [ + 75.179, + 75.1194, + 75.1336 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:31:11Z", + "avg_ns": 4124576863, + "stddev_ns": 1110120, + "avg_ts": 31.033488, + "stddev_ts": 0.008337, + "samples_ns": [ + 4125852411, + 4124028406, + 4123849773 + ], + "samples_ts": [ + 31.0239, + 31.0376, + 31.039 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 8 + }, + { + "timestamp_utc": "2025-12-08T20:32:22.651509+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:31:24Z\",\n \"avg_ns\": 1702907913,\n \"stddev_ns\": 269645,\n \"avg_ts\": 75.165545,\n \"stddev_ts\": 0.011621,\n \"samples_ns\": [ 1702604947, 1703037915, 1703080879 ],\n \"samples_ts\": [ 75.1789, 75.1598, 75.1579 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:31:31Z\",\n \"avg_ns\": 17049756038,\n \"stddev_ns\": 23425561,\n \"avg_ts\": 30.029793,\n \"stddev_ts\": 0.041227,\n \"samples_ns\": [ 17076706113, 17034278502, 17038283500 ],\n \"samples_ts\": [ 29.9824, 30.057, 30.05 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:31:24Z", + "avg_ns": 1702907913, + "stddev_ns": 269645, + "avg_ts": 75.165545, + "stddev_ts": 0.011621, + "samples_ns": [ + 1702604947, + 1703037915, + 1703080879 + ], + "samples_ts": [ + 75.1789, + 75.1598, + 75.1579 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:31:31Z", + "avg_ns": 17049756038, + "stddev_ns": 23425561, + "avg_ts": 30.029793, + "stddev_ts": 0.041227, + "samples_ns": [ + 17076706113, + 17034278502, + 17038283500 + ], + "samples_ts": [ + 29.9824, + 30.057, + 30.05 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 9 + }, + { + "timestamp_utc": "2025-12-08T20:33:04.981699+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:32:23Z\",\n \"avg_ns\": 7269167702,\n \"stddev_ns\": 339811,\n \"avg_ts\": 70.434474,\n \"stddev_ts\": 0.003187,\n \"samples_ns\": [ 7269380224, 7268788809, 7269334074 ],\n \"samples_ts\": [ 70.4324, 70.4381, 70.4329 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:32:52Z\",\n \"avg_ns\": 4159241036,\n \"stddev_ns\": 549880,\n \"avg_ts\": 30.774846,\n \"stddev_ts\": 0.004040,\n \"samples_ns\": [ 4158762190, 4159125158, 4159835761 ],\n \"samples_ts\": [ 30.7784, 30.7757, 30.7704 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:32:23Z", + "avg_ns": 7269167702, + "stddev_ns": 339811, + "avg_ts": 70.434474, + "stddev_ts": 0.003187, + "samples_ns": [ + 7269380224, + 7268788809, + 7269334074 + ], + "samples_ts": [ + 70.4324, + 70.4381, + 70.4329 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:32:52Z", + "avg_ns": 4159241036, + "stddev_ns": 549880, + "avg_ts": 30.774846, + "stddev_ts": 0.00404, + "samples_ns": [ + 4158762190, + 4159125158, + 4159835761 + ], + "samples_ts": [ + 30.7784, + 30.7757, + 30.7704 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 10 + }, + { + "timestamp_utc": "2025-12-08T20:34:26.378858+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:33:05Z\",\n \"avg_ns\": 7297553864,\n \"stddev_ns\": 348150,\n \"avg_ts\": 70.160496,\n \"stddev_ts\": 0.003245,\n \"samples_ns\": [ 7297408825, 7297939647, 7297313121 ],\n \"samples_ts\": [ 70.1619, 70.1568, 70.1628 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:33:34Z\",\n \"avg_ns\": 17142932744,\n \"stddev_ns\": 57568592,\n \"avg_ts\": 29.866760,\n \"stddev_ts\": 0.100491,\n \"samples_ns\": [ 17076460980, 17175681861, 17176655393 ],\n \"samples_ts\": [ 29.9828, 29.8096, 29.8079 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:33:05Z", + "avg_ns": 7297553864, + "stddev_ns": 348150, + "avg_ts": 70.160496, + "stddev_ts": 0.003245, + "samples_ns": [ + 7297408825, + 7297939647, + 7297313121 + ], + "samples_ts": [ + 70.1619, + 70.1568, + 70.1628 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:33:34Z", + "avg_ns": 17142932744, + "stddev_ns": 57568592, + "avg_ts": 29.86676, + "stddev_ts": 0.100491, + "samples_ns": [ + 17076460980, + 17175681861, + 17176655393 + ], + "samples_ts": [ + 29.9828, + 29.8096, + 29.8079 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 11 + }, + { + "timestamp_utc": "2025-12-08T20:34:46.366244+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:27Z\",\n \"avg_ns\": 1705057356,\n \"stddev_ns\": 394219,\n \"avg_ts\": 75.070791,\n \"stddev_ts\": 0.017358,\n \"samples_ns\": [ 1704640659, 1705424392, 1705107017 ],\n \"samples_ts\": [ 75.0891, 75.0546, 75.0686 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:33Z\",\n \"avg_ns\": 4131878641,\n \"stddev_ns\": 609983,\n \"avg_ts\": 30.978645,\n \"stddev_ts\": 0.004548,\n \"samples_ns\": [ 4132537002, 4131342412, 4131756510 ],\n \"samples_ts\": [ 30.9737, 30.9827, 30.9796 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:34:27Z", + "avg_ns": 1705057356, + "stddev_ns": 394219, + "avg_ts": 75.070791, + "stddev_ts": 0.017358, + "samples_ns": [ + 1704640659, + 1705424392, + 1705107017 + ], + "samples_ts": [ + 75.0891, + 75.0546, + 75.0686 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:34:33Z", + "avg_ns": 4131878641, + "stddev_ns": 609983, + "avg_ts": 30.978645, + "stddev_ts": 0.004548, + "samples_ns": [ + 4132537002, + 4131342412, + 4131756510 + ], + "samples_ts": [ + 30.9737, + 30.9827, + 30.9796 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 12 + }, + { + "timestamp_utc": "2025-12-08T20:35:45.290287+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:47Z\",\n \"avg_ns\": 1704141350,\n \"stddev_ns\": 83725,\n \"avg_ts\": 75.111140,\n \"stddev_ts\": 0.003690,\n \"samples_ns\": [ 1704091244, 1704238006, 1704094800 ],\n \"samples_ts\": [ 75.1133, 75.1069, 75.1132 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:34:53Z\",\n \"avg_ns\": 17111524862,\n \"stddev_ns\": 15104520,\n \"avg_ts\": 29.921370,\n \"stddev_ts\": 0.026423,\n \"samples_ns\": [ 17094120257, 17119266911, 17121187420 ],\n \"samples_ts\": [ 29.9518, 29.9078, 29.9045 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:34:47Z", + "avg_ns": 1704141350, + "stddev_ns": 83725, + "avg_ts": 75.11114, + "stddev_ts": 0.00369, + "samples_ns": [ + 1704091244, + 1704238006, + 1704094800 + ], + "samples_ts": [ + 75.1133, + 75.1069, + 75.1132 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:34:53Z", + "avg_ns": 17111524862, + "stddev_ns": 15104520, + "avg_ts": 29.92137, + "stddev_ts": 0.026423, + "samples_ns": [ + 17094120257, + 17119266911, + 17121187420 + ], + "samples_ts": [ + 29.9518, + 29.9078, + 29.9045 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 13 + }, + { + "timestamp_utc": "2025-12-08T20:36:26.525324+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:35:45Z\",\n \"avg_ns\": 7008650359,\n \"stddev_ns\": 1003246,\n \"avg_ts\": 73.052582,\n \"stddev_ts\": 0.010458,\n \"samples_ns\": [ 7009442518, 7007522251, 7008986308 ],\n \"samples_ts\": [ 73.0443, 73.0643, 73.0491 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:36:13Z\",\n \"avg_ns\": 4140625617,\n \"stddev_ns\": 174762,\n \"avg_ts\": 30.913203,\n \"stddev_ts\": 0.001114,\n \"samples_ns\": [ 4140531314, 4140797633, 4140547906 ],\n \"samples_ts\": [ 30.9139, 30.9119, 30.9138 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:35:45Z", + "avg_ns": 7008650359, + "stddev_ns": 1003246, + "avg_ts": 73.052582, + "stddev_ts": 0.010458, + "samples_ns": [ + 7009442518, + 7007522251, + 7008986308 + ], + "samples_ts": [ + 73.0443, + 73.0643, + 73.0491 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:36:13Z", + "avg_ns": 4140625617, + "stddev_ns": 174762, + "avg_ts": 30.913203, + "stddev_ts": 0.001114, + "samples_ns": [ + 4140531314, + 4140797633, + 4140547906 + ], + "samples_ts": [ + 30.9139, + 30.9119, + 30.9138 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 14 + }, + { + "timestamp_utc": "2025-12-08T20:37:46.111602+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:36:27Z\",\n \"avg_ns\": 6997039294,\n \"stddev_ns\": 575160,\n \"avg_ts\": 73.173807,\n \"stddev_ts\": 0.005951,\n \"samples_ns\": [ 6997246352, 6996395712, 6997475819 ],\n \"samples_ts\": [ 73.1716, 73.1805, 73.1692 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:36:55Z\",\n \"avg_ns\": 16937813621,\n \"stddev_ns\": 17060249,\n \"avg_ts\": 30.228243,\n \"stddev_ts\": 0.030464,\n \"samples_ns\": [ 16918115429, 16947517456, 16947807979 ],\n \"samples_ts\": [ 30.2634, 30.2109, 30.2104 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:36:27Z", + "avg_ns": 6997039294, + "stddev_ns": 575160, + "avg_ts": 73.173807, + "stddev_ts": 0.005951, + "samples_ns": [ + 6997246352, + 6996395712, + 6997475819 + ], + "samples_ts": [ + 73.1716, + 73.1805, + 73.1692 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:36:55Z", + "avg_ns": 16937813621, + "stddev_ns": 17060249, + "avg_ts": 30.228243, + "stddev_ts": 0.030464, + "samples_ns": [ + 16918115429, + 16947517456, + 16947807979 + ], + "samples_ts": [ + 30.2634, + 30.2109, + 30.2104 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 15 + }, + { + "timestamp_utc": "2025-12-08T20:38:06.099859+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:37:46Z\",\n \"avg_ns\": 1704734142,\n \"stddev_ns\": 47076,\n \"avg_ts\": 75.085022,\n \"stddev_ts\": 0.000996,\n \"samples_ns\": [ 1704759921, 1704717642, 1704724864 ],\n \"samples_ts\": [ 75.0839, 75.0857, 75.0854 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:37:53Z\",\n \"avg_ns\": 4130455572,\n \"stddev_ns\": 226292,\n \"avg_ts\": 30.989318,\n \"stddev_ts\": 0.001628,\n \"samples_ns\": [ 4130345102, 4130316064, 4130705551 ],\n \"samples_ts\": [ 30.9901, 30.9904, 30.9874 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:37:46Z", + "avg_ns": 1704734142, + "stddev_ns": 47076, + "avg_ts": 75.085022, + "stddev_ts": 0.000996, + "samples_ns": [ + 1704759921, + 1704717642, + 1704724864 + ], + "samples_ts": [ + 75.0839, + 75.0857, + 75.0854 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:37:53Z", + "avg_ns": 4130455572, + "stddev_ns": 226292, + "avg_ts": 30.989318, + "stddev_ts": 0.001628, + "samples_ns": [ + 4130345102, + 4130316064, + 4130705551 + ], + "samples_ts": [ + 30.9901, + 30.9904, + 30.9874 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 16 + }, + { + "timestamp_utc": "2025-12-08T20:39:05.191511+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:38:06Z\",\n \"avg_ns\": 1704274710,\n \"stddev_ns\": 269310,\n \"avg_ts\": 75.105264,\n \"stddev_ts\": 0.011867,\n \"samples_ns\": [ 1704094763, 1704145042, 1704584325 ],\n \"samples_ts\": [ 75.1132, 75.111, 75.0916 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:38:13Z\",\n \"avg_ns\": 17166102472,\n \"stddev_ns\": 30998701,\n \"avg_ts\": 29.826288,\n \"stddev_ts\": 0.053917,\n \"samples_ns\": [ 17130315284, 17184610389, 17183381743 ],\n \"samples_ts\": [ 29.8885, 29.7941, 29.7962 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:38:06Z", + "avg_ns": 1704274710, + "stddev_ns": 269310, + "avg_ts": 75.105264, + "stddev_ts": 0.011867, + "samples_ns": [ + 1704094763, + 1704145042, + 1704584325 + ], + "samples_ts": [ + 75.1132, + 75.111, + 75.0916 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:38:13Z", + "avg_ns": 17166102472, + "stddev_ns": 30998701, + "avg_ts": 29.826288, + "stddev_ts": 0.053917, + "samples_ns": [ + 17130315284, + 17184610389, + 17183381743 + ], + "samples_ts": [ + 29.8885, + 29.7941, + 29.7962 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 17 + }, + { + "timestamp_utc": "2025-12-08T20:39:46.443495+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:05Z\",\n \"avg_ns\": 7020994242,\n \"stddev_ns\": 636133,\n \"avg_ts\": 72.924145,\n \"stddev_ts\": 0.006550,\n \"samples_ns\": [ 7020297536, 7021525885, 7021159306 ],\n \"samples_ts\": [ 72.9314, 72.9186, 72.9224 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:33Z\",\n \"avg_ns\": 4130599370,\n \"stddev_ns\": 481995,\n \"avg_ts\": 30.988239,\n \"stddev_ts\": 0.003551,\n \"samples_ns\": [ 4130115599, 4131061556, 4130620957 ],\n \"samples_ts\": [ 30.9919, 30.9848, 30.9881 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:39:05Z", + "avg_ns": 7020994242, + "stddev_ns": 636133, + "avg_ts": 72.924145, + "stddev_ts": 0.00655, + "samples_ns": [ + 7020297536, + 7021525885, + 7021159306 + ], + "samples_ts": [ + 72.9314, + 72.9186, + 72.9224 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:39:33Z", + "avg_ns": 4130599370, + "stddev_ns": 481995, + "avg_ts": 30.988239, + "stddev_ts": 0.003551, + "samples_ns": [ + 4130115599, + 4131061556, + 4130620957 + ], + "samples_ts": [ + 30.9919, + 30.9848, + 30.9881 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 18 + }, + { + "timestamp_utc": "2025-12-08T20:41:06.733522+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:39:47Z\",\n \"avg_ns\": 7031929495,\n \"stddev_ns\": 510113,\n \"avg_ts\": 72.810742,\n \"stddev_ts\": 0.005210,\n \"samples_ns\": [ 7032346014, 7032072046, 7031370426 ],\n \"samples_ts\": [ 72.8064, 72.8093, 72.8165 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:40:15Z\",\n \"avg_ns\": 17126419706,\n \"stddev_ns\": 14453043,\n \"avg_ts\": 29.895346,\n \"stddev_ts\": 0.025219,\n \"samples_ns\": [ 17142790360, 17115425156, 17121043602 ],\n \"samples_ts\": [ 29.8668, 29.9145, 29.9047 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:39:47Z", + "avg_ns": 7031929495, + "stddev_ns": 510113, + "avg_ts": 72.810742, + "stddev_ts": 0.00521, + "samples_ns": [ + 7032346014, + 7032072046, + 7031370426 + ], + "samples_ts": [ + 72.8064, + 72.8093, + 72.8165 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:40:15Z", + "avg_ns": 17126419706, + "stddev_ns": 14453043, + "avg_ts": 29.895346, + "stddev_ts": 0.025219, + "samples_ns": [ + 17142790360, + 17115425156, + 17121043602 + ], + "samples_ts": [ + 29.8668, + 29.9145, + 29.9047 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 19 + }, + { + "timestamp_utc": "2025-12-08T20:41:26.750157+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:07Z\",\n \"avg_ns\": 1704095337,\n \"stddev_ns\": 345886,\n \"avg_ts\": 75.113170,\n \"stddev_ts\": 0.015245,\n \"samples_ns\": [ 1703771092, 1704055503, 1704459416 ],\n \"samples_ts\": [ 75.1275, 75.1149, 75.0971 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:14Z\",\n \"avg_ns\": 4142214979,\n \"stddev_ns\": 531827,\n \"avg_ts\": 30.901342,\n \"stddev_ts\": 0.003967,\n \"samples_ns\": [ 4142134317, 4142782530, 4141728090 ],\n \"samples_ts\": [ 30.9019, 30.8971, 30.905 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:41:07Z", + "avg_ns": 1704095337, + "stddev_ns": 345886, + "avg_ts": 75.11317, + "stddev_ts": 0.015245, + "samples_ns": [ + 1703771092, + 1704055503, + 1704459416 + ], + "samples_ts": [ + 75.1275, + 75.1149, + 75.0971 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:41:14Z", + "avg_ns": 4142214979, + "stddev_ns": 531827, + "avg_ts": 30.901342, + "stddev_ts": 0.003967, + "samples_ns": [ + 4142134317, + 4142782530, + 4141728090 + ], + "samples_ts": [ + 30.9019, + 30.8971, + 30.905 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 20 + }, + { + "timestamp_utc": "2025-12-08T20:42:25.053049+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:27Z\",\n \"avg_ns\": 1704052639,\n \"stddev_ns\": 413011,\n \"avg_ts\": 75.115053,\n \"stddev_ts\": 0.018207,\n \"samples_ns\": [ 1703599108, 1704407119, 1704151690 ],\n \"samples_ts\": [ 75.135, 75.0994, 75.1107 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:41:34Z\",\n \"avg_ns\": 16903511816,\n \"stddev_ns\": 602702,\n \"avg_ts\": 30.289564,\n \"stddev_ts\": 0.001029,\n \"samples_ns\": [ 16904087701, 16903507971, 16902939778 ],\n \"samples_ts\": [ 30.2885, 30.2896, 30.2906 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:41:27Z", + "avg_ns": 1704052639, + "stddev_ns": 413011, + "avg_ts": 75.115053, + "stddev_ts": 0.018207, + "samples_ns": [ + 1703599108, + 1704407119, + 1704151690 + ], + "samples_ts": [ + 75.135, + 75.0994, + 75.1107 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:41:34Z", + "avg_ns": 16903511816, + "stddev_ns": 602702, + "avg_ts": 30.289564, + "stddev_ts": 0.001029, + "samples_ns": [ + 16904087701, + 16903507971, + 16902939778 + ], + "samples_ts": [ + 30.2885, + 30.2896, + 30.2906 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 21 + }, + { + "timestamp_utc": "2025-12-08T20:43:07.375103+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:42:25Z\",\n \"avg_ns\": 7281718699,\n \"stddev_ns\": 1167709,\n \"avg_ts\": 70.313072,\n \"stddev_ts\": 0.011277,\n \"samples_ns\": [ 7282443956, 7282340473, 7280371668 ],\n \"samples_ts\": [ 70.3061, 70.3071, 70.3261 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:42:54Z\",\n \"avg_ns\": 4139053460,\n \"stddev_ns\": 665206,\n \"avg_ts\": 30.924945,\n \"stddev_ts\": 0.004924,\n \"samples_ns\": [ 4139195907, 4138334933, 4139629542 ],\n \"samples_ts\": [ 30.9239, 30.9303, 30.9206 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:42:25Z", + "avg_ns": 7281718699, + "stddev_ns": 1167709, + "avg_ts": 70.313072, + "stddev_ts": 0.011277, + "samples_ns": [ + 7282443956, + 7282340473, + 7280371668 + ], + "samples_ts": [ + 70.3061, + 70.3071, + 70.3261 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:42:54Z", + "avg_ns": 4139053460, + "stddev_ns": 665206, + "avg_ts": 30.924945, + "stddev_ts": 0.004924, + "samples_ns": [ + 4139195907, + 4138334933, + 4139629542 + ], + "samples_ts": [ + 30.9239, + 30.9303, + 30.9206 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 22 + }, + { + "timestamp_utc": "2025-12-08T20:44:28.595374+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:43:08Z\",\n \"avg_ns\": 7293532667,\n \"stddev_ns\": 6765835,\n \"avg_ts\": 70.199219,\n \"stddev_ts\": 0.065144,\n \"samples_ns\": [ 7285734871, 7297034431, 7297828701 ],\n \"samples_ts\": [ 70.2743, 70.1655, 70.1579 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:43:37Z\",\n \"avg_ns\": 17090149900,\n \"stddev_ns\": 47943505,\n \"avg_ts\": 29.958935,\n \"stddev_ts\": 0.083942,\n \"samples_ns\": [ 17143954042, 17074535676, 17051959982 ],\n \"samples_ts\": [ 29.8648, 29.9862, 30.0259 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:43:08Z", + "avg_ns": 7293532667, + "stddev_ns": 6765835, + "avg_ts": 70.199219, + "stddev_ts": 0.065144, + "samples_ns": [ + 7285734871, + 7297034431, + 7297828701 + ], + "samples_ts": [ + 70.2743, + 70.1655, + 70.1579 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:43:37Z", + "avg_ns": 17090149900, + "stddev_ns": 47943505, + "avg_ts": 29.958935, + "stddev_ts": 0.083942, + "samples_ns": [ + 17143954042, + 17074535676, + 17051959982 + ], + "samples_ts": [ + 29.8648, + 29.9862, + 30.0259 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 23 + }, + { + "timestamp_utc": "2025-12-08T20:44:48.609233+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:44:29Z\",\n \"avg_ns\": 1703813339,\n \"stddev_ns\": 120842,\n \"avg_ts\": 75.125601,\n \"stddev_ts\": 0.005328,\n \"samples_ns\": [ 1703840074, 1703918575, 1703681368 ],\n \"samples_ts\": [ 75.1244, 75.121, 75.1314 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:44:36Z\",\n \"avg_ns\": 4125481147,\n \"stddev_ns\": 881168,\n \"avg_ts\": 31.026685,\n \"stddev_ts\": 0.006626,\n \"samples_ns\": [ 4124800193, 4126476364, 4125166884 ],\n \"samples_ts\": [ 31.0318, 31.0192, 31.029 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:44:29Z", + "avg_ns": 1703813339, + "stddev_ns": 120842, + "avg_ts": 75.125601, + "stddev_ts": 0.005328, + "samples_ns": [ + 1703840074, + 1703918575, + 1703681368 + ], + "samples_ts": [ + 75.1244, + 75.121, + 75.1314 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:44:36Z", + "avg_ns": 4125481147, + "stddev_ns": 881168, + "avg_ts": 31.026685, + "stddev_ts": 0.006626, + "samples_ns": [ + 4124800193, + 4126476364, + 4125166884 + ], + "samples_ts": [ + 31.0318, + 31.0192, + 31.029 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 24 + }, + { + "timestamp_utc": "2025-12-08T20:45:47.535321+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:44:49Z\",\n \"avg_ns\": 1703837389,\n \"stddev_ns\": 274907,\n \"avg_ts\": 75.124541,\n \"stddev_ts\": 0.012122,\n \"samples_ns\": [ 1704083059, 1703888649, 1703540459 ],\n \"samples_ts\": [ 75.1137, 75.1223, 75.1376 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:44:56Z\",\n \"avg_ns\": 17095298920,\n \"stddev_ns\": 17637542,\n \"avg_ts\": 29.949775,\n \"stddev_ts\": 0.030881,\n \"samples_ns\": [ 17115545028, 17087078513, 17083273221 ],\n \"samples_ts\": [ 29.9143, 29.9642, 29.9708 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:44:49Z", + "avg_ns": 1703837389, + "stddev_ns": 274907, + "avg_ts": 75.124541, + "stddev_ts": 0.012122, + "samples_ns": [ + 1704083059, + 1703888649, + 1703540459 + ], + "samples_ts": [ + 75.1137, + 75.1223, + 75.1376 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:44:56Z", + "avg_ns": 17095298920, + "stddev_ns": 17637542, + "avg_ts": 29.949775, + "stddev_ts": 0.030881, + "samples_ns": [ + 17115545028, + 17087078513, + 17083273221 + ], + "samples_ts": [ + 29.9143, + 29.9642, + 29.9708 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 25 + }, + { + "timestamp_utc": "2025-12-08T20:46:28.668336+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:45:48Z\",\n \"avg_ns\": 6997944245,\n \"stddev_ns\": 715674,\n \"avg_ts\": 73.164345,\n \"stddev_ts\": 0.007482,\n \"samples_ns\": [ 6997571783, 6998769337, 6997491615 ],\n \"samples_ts\": [ 73.1682, 73.1557, 73.1691 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:46:16Z\",\n \"avg_ns\": 4117905939,\n \"stddev_ns\": 696880,\n \"avg_ts\": 31.083761,\n \"stddev_ts\": 0.005215,\n \"samples_ns\": [ 4117413945, 4117608004, 4118695870 ],\n \"samples_ts\": [ 31.0875, 31.086, 31.0778 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:45:48Z", + "avg_ns": 6997944245, + "stddev_ns": 715674, + "avg_ts": 73.164345, + "stddev_ts": 0.007482, + "samples_ns": [ + 6997571783, + 6998769337, + 6997491615 + ], + "samples_ts": [ + 73.1682, + 73.1557, + 73.1691 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:46:16Z", + "avg_ns": 4117905939, + "stddev_ns": 696880, + "avg_ts": 31.083761, + "stddev_ts": 0.005215, + "samples_ns": [ + 4117413945, + 4117608004, + 4118695870 + ], + "samples_ts": [ + 31.0875, + 31.086, + 31.0778 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 26 + }, + { + "timestamp_utc": "2025-12-08T20:47:48.451782+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:46:29Z\",\n \"avg_ns\": 6998207063,\n \"stddev_ns\": 148542,\n \"avg_ts\": 73.161596,\n \"stddev_ts\": 0.001283,\n \"samples_ns\": [ 6998067345, 6998256292, 6998297553 ],\n \"samples_ts\": [ 73.1631, 73.1611, 73.1607 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:46:57Z\",\n \"avg_ns\": 16995962723,\n \"stddev_ns\": 30526396,\n \"avg_ts\": 30.124866,\n \"stddev_ts\": 0.054051,\n \"samples_ns\": [ 17031209201, 16978668489, 16978010480 ],\n \"samples_ts\": [ 30.0625, 30.1555, 30.1567 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:46:29Z", + "avg_ns": 6998207063, + "stddev_ns": 148542, + "avg_ts": 73.161596, + "stddev_ts": 0.001283, + "samples_ns": [ + 6998067345, + 6998256292, + 6998297553 + ], + "samples_ts": [ + 73.1631, + 73.1611, + 73.1607 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:46:57Z", + "avg_ns": 16995962723, + "stddev_ns": 30526396, + "avg_ts": 30.124866, + "stddev_ts": 0.054051, + "samples_ns": [ + 17031209201, + 16978668489, + 16978010480 + ], + "samples_ts": [ + 30.0625, + 30.1555, + 30.1567 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 27 + }, + { + "timestamp_utc": "2025-12-08T20:48:08.457347+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:47:49Z\",\n \"avg_ns\": 1703098002,\n \"stddev_ns\": 179191,\n \"avg_ts\": 75.157155,\n \"stddev_ts\": 0.007696,\n \"samples_ns\": [ 1703192325, 1703204899, 1702896783 ],\n \"samples_ts\": [ 75.153, 75.1524, 75.166 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:47:55Z\",\n \"avg_ns\": 4138012879,\n \"stddev_ns\": 857528,\n \"avg_ts\": 30.932722,\n \"stddev_ts\": 0.006411,\n \"samples_ns\": [ 4138342192, 4138656937, 4137039508 ],\n \"samples_ts\": [ 30.9303, 30.9279, 30.94 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:47:49Z", + "avg_ns": 1703098002, + "stddev_ns": 179191, + "avg_ts": 75.157155, + "stddev_ts": 0.007696, + "samples_ns": [ + 1703192325, + 1703204899, + 1702896783 + ], + "samples_ts": [ + 75.153, + 75.1524, + 75.166 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:47:55Z", + "avg_ns": 4138012879, + "stddev_ns": 857528, + "avg_ts": 30.932722, + "stddev_ts": 0.006411, + "samples_ns": [ + 4138342192, + 4138656937, + 4137039508 + ], + "samples_ts": [ + 30.9303, + 30.9279, + 30.94 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 28 + }, + { + "timestamp_utc": "2025-12-08T20:49:07.449694+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:48:09Z\",\n \"avg_ns\": 1704012904,\n \"stddev_ns\": 222859,\n \"avg_ts\": 75.116803,\n \"stddev_ts\": 0.009825,\n \"samples_ns\": [ 1704199839, 1704072596, 1703766277 ],\n \"samples_ts\": [ 75.1086, 75.1142, 75.1277 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:48:15Z\",\n \"avg_ns\": 17133240267,\n \"stddev_ns\": 13711311,\n \"avg_ts\": 29.883444,\n \"stddev_ts\": 0.023903,\n \"samples_ns\": [ 17148969592, 17126932589, 17123818621 ],\n \"samples_ts\": [ 29.856, 29.8944, 29.8999 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:48:09Z", + "avg_ns": 1704012904, + "stddev_ns": 222859, + "avg_ts": 75.116803, + "stddev_ts": 0.009825, + "samples_ns": [ + 1704199839, + 1704072596, + 1703766277 + ], + "samples_ts": [ + 75.1086, + 75.1142, + 75.1277 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:48:15Z", + "avg_ns": 17133240267, + "stddev_ns": 13711311, + "avg_ts": 29.883444, + "stddev_ts": 0.023903, + "samples_ns": [ + 17148969592, + 17126932589, + 17123818621 + ], + "samples_ts": [ + 29.856, + 29.8944, + 29.8999 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 29 + }, + { + "timestamp_utc": "2025-12-08T20:49:48.701162+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:49:08Z\",\n \"avg_ns\": 7034202421,\n \"stddev_ns\": 533949,\n \"avg_ts\": 72.787215,\n \"stddev_ts\": 0.005456,\n \"samples_ns\": [ 7034171409, 7034744565, 7033691290 ],\n \"samples_ts\": [ 72.7875, 72.7816, 72.7925 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:49:36Z\",\n \"avg_ns\": 4107693634,\n \"stddev_ns\": 354397,\n \"avg_ts\": 31.161039,\n \"stddev_ts\": 0.002599,\n \"samples_ns\": [ 4107803352, 4107967947, 4107309605 ],\n \"samples_ts\": [ 31.1602, 31.159, 31.164 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:49:08Z", + "avg_ns": 7034202421, + "stddev_ns": 533949, + "avg_ts": 72.787215, + "stddev_ts": 0.005456, + "samples_ns": [ + 7034171409, + 7034744565, + 7033691290 + ], + "samples_ts": [ + 72.7875, + 72.7816, + 72.7925 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:49:36Z", + "avg_ns": 4107693634, + "stddev_ns": 354397, + "avg_ts": 31.161039, + "stddev_ts": 0.002599, + "samples_ns": [ + 4107803352, + 4107967947, + 4107309605 + ], + "samples_ts": [ + 31.1602, + 31.159, + 31.164 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 30 + }, + { + "timestamp_utc": "2025-12-08T20:51:08.757076+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:49:49Z\",\n \"avg_ns\": 7032072003,\n \"stddev_ns\": 769140,\n \"avg_ts\": 72.809266,\n \"stddev_ts\": 0.007868,\n \"samples_ns\": [ 7031336204, 7032853983, 7032025824 ],\n \"samples_ts\": [ 72.8169, 72.8012, 72.8097 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:50:17Z\",\n \"avg_ns\": 17034894423,\n \"stddev_ns\": 42890901,\n \"avg_ts\": 30.056081,\n \"stddev_ts\": 0.075785,\n \"samples_ns\": [ 16985391191, 17060944066, 17058348013 ],\n \"samples_ts\": [ 30.1436, 30.0101, 30.0146 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:49:49Z", + "avg_ns": 7032072003, + "stddev_ns": 769140, + "avg_ts": 72.809266, + "stddev_ts": 0.007868, + "samples_ns": [ + 7031336204, + 7032853983, + 7032025824 + ], + "samples_ts": [ + 72.8169, + 72.8012, + 72.8097 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:50:17Z", + "avg_ns": 17034894423, + "stddev_ns": 42890901, + "avg_ts": 30.056081, + "stddev_ts": 0.075785, + "samples_ns": [ + 16985391191, + 17060944066, + 17058348013 + ], + "samples_ts": [ + 30.1436, + 30.0101, + 30.0146 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 31 + }, + { + "timestamp_utc": "2025-12-08T20:51:28.756444+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:51:09Z\",\n \"avg_ns\": 1702700787,\n \"stddev_ns\": 167271,\n \"avg_ts\": 75.174688,\n \"stddev_ts\": 0.007157,\n \"samples_ns\": [ 1702518834, 1702829801, 1702753727 ],\n \"samples_ts\": [ 75.1827, 75.169, 75.1724 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:51:16Z\",\n \"avg_ns\": 4133581855,\n \"stddev_ns\": 749758,\n \"avg_ts\": 30.965881,\n \"stddev_ts\": 0.005575,\n \"samples_ns\": [ 4134427324, 4133025861, 4133292382 ],\n \"samples_ts\": [ 30.9595, 30.97, 30.968 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:51:09Z", + "avg_ns": 1702700787, + "stddev_ns": 167271, + "avg_ts": 75.174688, + "stddev_ts": 0.007157, + "samples_ns": [ + 1702518834, + 1702829801, + 1702753727 + ], + "samples_ts": [ + 75.1827, + 75.169, + 75.1724 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:51:16Z", + "avg_ns": 4133581855, + "stddev_ns": 749758, + "avg_ts": 30.965881, + "stddev_ts": 0.005575, + "samples_ns": [ + 4134427324, + 4133025861, + 4133292382 + ], + "samples_ts": [ + 30.9595, + 30.97, + 30.968 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 32 + }, + { + "timestamp_utc": "2025-12-08T20:52:27.326517+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:51:29Z\",\n \"avg_ns\": 1705490035,\n \"stddev_ns\": 172896,\n \"avg_ts\": 75.051744,\n \"stddev_ts\": 0.007609,\n \"samples_ns\": [ 1705303465, 1705521782, 1705644858 ],\n \"samples_ts\": [ 75.06, 75.0503, 75.0449 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:51:36Z\",\n \"avg_ns\": 16988574438,\n \"stddev_ns\": 31374232,\n \"avg_ts\": 30.137971,\n \"stddev_ts\": 0.055599,\n \"samples_ns\": [ 17024769410, 16969147101, 16971806804 ],\n \"samples_ts\": [ 30.0738, 30.1724, 30.1677 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:51:29Z", + "avg_ns": 1705490035, + "stddev_ns": 172896, + "avg_ts": 75.051744, + "stddev_ts": 0.007609, + "samples_ns": [ + 1705303465, + 1705521782, + 1705644858 + ], + "samples_ts": [ + 75.06, + 75.0503, + 75.0449 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:51:36Z", + "avg_ns": 16988574438, + "stddev_ns": 31374232, + "avg_ts": 30.137971, + "stddev_ts": 0.055599, + "samples_ns": [ + 17024769410, + 16969147101, + 16971806804 + ], + "samples_ts": [ + 30.0738, + 30.1724, + 30.1677 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 33 + }, + { + "timestamp_utc": "2025-12-08T20:53:09.728083+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:52:28Z\",\n \"avg_ns\": 7289896168,\n \"stddev_ns\": 790150,\n \"avg_ts\": 70.234197,\n \"stddev_ts\": 0.007613,\n \"samples_ns\": [ 7289021730, 7290107875, 7290558899 ],\n \"samples_ts\": [ 70.2426, 70.2322, 70.2278 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:52:57Z\",\n \"avg_ns\": 4138514070,\n \"stddev_ns\": 841216,\n \"avg_ts\": 30.928976,\n \"stddev_ts\": 0.006250,\n \"samples_ns\": [ 4138681400, 4139254038, 4137606774 ],\n \"samples_ts\": [ 30.9277, 30.9234, 30.9358 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:52:28Z", + "avg_ns": 7289896168, + "stddev_ns": 790150, + "avg_ts": 70.234197, + "stddev_ts": 0.007613, + "samples_ns": [ + 7289021730, + 7290107875, + 7290558899 + ], + "samples_ts": [ + 70.2426, + 70.2322, + 70.2278 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:52:57Z", + "avg_ns": 4138514070, + "stddev_ns": 841216, + "avg_ts": 30.928976, + "stddev_ts": 0.00625, + "samples_ns": [ + 4138681400, + 4139254038, + 4137606774 + ], + "samples_ts": [ + 30.9277, + 30.9234, + 30.9358 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 34 + }, + { + "timestamp_utc": "2025-12-08T20:54:30.230477+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:53:10Z\",\n \"avg_ns\": 7268279264,\n \"stddev_ns\": 787263,\n \"avg_ts\": 70.443084,\n \"stddev_ts\": 0.007630,\n \"samples_ns\": [ 7268625935, 7268833698, 7267378159 ],\n \"samples_ts\": [ 70.4397, 70.4377, 70.4518 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:53:39Z\",\n \"avg_ns\": 16881600385,\n \"stddev_ns\": 2347077,\n \"avg_ts\": 30.328878,\n \"stddev_ts\": 0.004217,\n \"samples_ns\": [ 16884055079, 16881367839, 16879378237 ],\n \"samples_ts\": [ 30.3245, 30.3293, 30.3329 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:53:10Z", + "avg_ns": 7268279264, + "stddev_ns": 787263, + "avg_ts": 70.443084, + "stddev_ts": 0.00763, + "samples_ns": [ + 7268625935, + 7268833698, + 7267378159 + ], + "samples_ts": [ + 70.4397, + 70.4377, + 70.4518 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:53:39Z", + "avg_ns": 16881600385, + "stddev_ns": 2347077, + "avg_ts": 30.328878, + "stddev_ts": 0.004217, + "samples_ns": [ + 16884055079, + 16881367839, + 16879378237 + ], + "samples_ts": [ + 30.3245, + 30.3293, + 30.3329 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 35 + }, + { + "timestamp_utc": "2025-12-08T20:54:43.821146+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:54:30Z\",\n \"avg_ns\": 872073402,\n \"stddev_ns\": 105546,\n \"avg_ts\": 146.776637,\n \"stddev_ts\": 0.016314,\n \"samples_ns\": [ 871992846, 872180978, 872046384 ],\n \"samples_ts\": [ 146.79, 146.759, 146.781 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:54:34Z\",\n \"avg_ns\": 3109714224,\n \"stddev_ns\": 2210701,\n \"avg_ts\": 41.161352,\n \"stddev_ts\": 0.029255,\n \"samples_ns\": [ 3107418545, 3109896931, 3111827197 ],\n \"samples_ts\": [ 41.1917, 41.1589, 41.1334 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:54:30Z", + "avg_ns": 872073402, + "stddev_ns": 105546, + "avg_ts": 146.776637, + "stddev_ts": 0.016314, + "samples_ns": [ + 871992846, + 872180978, + 872046384 + ], + "samples_ts": [ + 146.79, + 146.759, + 146.781 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:54:34Z", + "avg_ns": 3109714224, + "stddev_ns": 2210701, + "avg_ts": 41.161352, + "stddev_ts": 0.029255, + "samples_ns": [ + 3107418545, + 3109896931, + 3111827197 + ], + "samples_ts": [ + 41.1917, + 41.1589, + 41.1334 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 36 + }, + { + "timestamp_utc": "2025-12-08T20:55:26.252325+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:54:44Z\",\n \"avg_ns\": 872033111,\n \"stddev_ns\": 77389,\n \"avg_ts\": 146.783418,\n \"stddev_ts\": 0.010967,\n \"samples_ns\": [ 872108292, 871997975, 871993068 ],\n \"samples_ts\": [ 146.771, 146.789, 146.79 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:54:47Z\",\n \"avg_ns\": 12725229368,\n \"stddev_ns\": 9461001,\n \"avg_ts\": 40.235046,\n \"stddev_ts\": 0.029915,\n \"samples_ns\": [ 12715145269, 12733907116, 12726635721 ],\n \"samples_ts\": [ 40.2669, 40.2076, 40.2306 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:54:44Z", + "avg_ns": 872033111, + "stddev_ns": 77389, + "avg_ts": 146.783418, + "stddev_ts": 0.010967, + "samples_ns": [ + 872108292, + 871997975, + 871993068 + ], + "samples_ts": [ + 146.771, + 146.789, + 146.79 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:54:47Z", + "avg_ns": 12725229368, + "stddev_ns": 9461001, + "avg_ts": 40.235046, + "stddev_ts": 0.029915, + "samples_ns": [ + 12715145269, + 12733907116, + 12726635721 + ], + "samples_ts": [ + 40.2669, + 40.2076, + 40.2306 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 37 + }, + { + "timestamp_utc": "2025-12-08T20:55:50.633391+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:55:26Z\",\n \"avg_ns\": 3572203858,\n \"stddev_ns\": 418612,\n \"avg_ts\": 143.328887,\n \"stddev_ts\": 0.016795,\n \"samples_ns\": [ 3571932873, 3572685994, 3571992707 ],\n \"samples_ts\": [ 143.34, 143.31, 143.337 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:55:41Z\",\n \"avg_ns\": 3106605614,\n \"stddev_ns\": 769831,\n \"avg_ts\": 41.202528,\n \"stddev_ts\": 0.010184,\n \"samples_ns\": [ 3107358655, 3105823839, 3106634349 ],\n \"samples_ts\": [ 41.1925, 41.2129, 41.2021 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:55:26Z", + "avg_ns": 3572203858, + "stddev_ns": 418612, + "avg_ts": 143.328887, + "stddev_ts": 0.016795, + "samples_ns": [ + 3571932873, + 3572685994, + 3571992707 + ], + "samples_ts": [ + 143.34, + 143.31, + 143.337 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:55:41Z", + "avg_ns": 3106605614, + "stddev_ns": 769831, + "avg_ts": 41.202528, + "stddev_ts": 0.010184, + "samples_ns": [ + 3107358655, + 3105823839, + 3106634349 + ], + "samples_ts": [ + 41.1925, + 41.2129, + 41.2021 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 38 + }, + { + "timestamp_utc": "2025-12-08T20:56:43.664416+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:55:51Z\",\n \"avg_ns\": 3571229011,\n \"stddev_ns\": 178757,\n \"avg_ts\": 143.368011,\n \"stddev_ts\": 0.007176,\n \"samples_ns\": [ 3571435384, 3571122380, 3571129269 ],\n \"samples_ts\": [ 143.36, 143.372, 143.372 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:56:05Z\",\n \"avg_ns\": 12650059560,\n \"stddev_ns\": 1774588,\n \"avg_ts\": 40.474118,\n \"stddev_ts\": 0.005666,\n \"samples_ns\": [ 12649641900, 12648534700, 12652002081 ],\n \"samples_ts\": [ 40.4755, 40.479, 40.4679 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:55:51Z", + "avg_ns": 3571229011, + "stddev_ns": 178757, + "avg_ts": 143.368011, + "stddev_ts": 0.007176, + "samples_ns": [ + 3571435384, + 3571122380, + 3571129269 + ], + "samples_ts": [ + 143.36, + 143.372, + 143.372 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:56:05Z", + "avg_ns": 12650059560, + "stddev_ns": 1774588, + "avg_ts": 40.474118, + "stddev_ts": 0.005666, + "samples_ns": [ + 12649641900, + 12648534700, + 12652002081 + ], + "samples_ts": [ + 40.4755, + 40.479, + 40.4679 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 39 + }, + { + "timestamp_utc": "2025-12-08T20:56:57.261774+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:56:44Z\",\n \"avg_ns\": 874367434,\n \"stddev_ns\": 159000,\n \"avg_ts\": 146.391549,\n \"stddev_ts\": 0.026622,\n \"samples_ns\": [ 874510843, 874395009, 874196450 ],\n \"samples_ts\": [ 146.368, 146.387, 146.42 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:56:47Z\",\n \"avg_ns\": 3108093499,\n \"stddev_ns\": 712088,\n \"avg_ts\": 41.182804,\n \"stddev_ts\": 0.009406,\n \"samples_ns\": [ 3107405842, 3108050925, 3108823731 ],\n \"samples_ts\": [ 41.1919, 41.1834, 41.1731 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:56:44Z", + "avg_ns": 874367434, + "stddev_ns": 159000, + "avg_ts": 146.391549, + "stddev_ts": 0.026622, + "samples_ns": [ + 874510843, + 874395009, + 874196450 + ], + "samples_ts": [ + 146.368, + 146.387, + 146.42 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:56:47Z", + "avg_ns": 3108093499, + "stddev_ns": 712088, + "avg_ts": 41.182804, + "stddev_ts": 0.009406, + "samples_ns": [ + 3107405842, + 3108050925, + 3108823731 + ], + "samples_ts": [ + 41.1919, + 41.1834, + 41.1731 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 40 + }, + { + "timestamp_utc": "2025-12-08T20:57:39.600060+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:56:57Z\",\n \"avg_ns\": 872626446,\n \"stddev_ns\": 43030,\n \"avg_ts\": 146.683613,\n \"stddev_ts\": 0.001734,\n \"samples_ns\": [ 872634354, 872614780, 872630206 ],\n \"samples_ts\": [ 146.682, 146.686, 146.683 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:57:01Z\",\n \"avg_ns\": 12689302629,\n \"stddev_ns\": 8272322,\n \"avg_ts\": 40.348958,\n \"stddev_ts\": 0.026300,\n \"samples_ns\": [ 12681361312, 12697868964, 12688677612 ],\n \"samples_ts\": [ 40.3742, 40.3217, 40.3509 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:56:57Z", + "avg_ns": 872626446, + "stddev_ns": 43030, + "avg_ts": 146.683613, + "stddev_ts": 0.001734, + "samples_ns": [ + 872634354, + 872614780, + 872630206 + ], + "samples_ts": [ + 146.682, + 146.686, + 146.683 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:57:01Z", + "avg_ns": 12689302629, + "stddev_ns": 8272322, + "avg_ts": 40.348958, + "stddev_ts": 0.0263, + "samples_ns": [ + 12681361312, + 12697868964, + 12688677612 + ], + "samples_ts": [ + 40.3742, + 40.3217, + 40.3509 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 41 + }, + { + "timestamp_utc": "2025-12-08T20:58:04.080599+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:57:40Z\",\n \"avg_ns\": 3600014972,\n \"stddev_ns\": 388755,\n \"avg_ts\": 142.221632,\n \"stddev_ts\": 0.015174,\n \"samples_ns\": [ 3600002936, 3600404946, 3599637035 ],\n \"samples_ts\": [ 142.222, 142.206, 142.237 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:57:54Z\",\n \"avg_ns\": 3102364313,\n \"stddev_ns\": 498456,\n \"avg_ts\": 41.258856,\n \"stddev_ts\": 0.006630,\n \"samples_ns\": [ 3102562408, 3102733269, 3101797262 ],\n \"samples_ts\": [ 41.2562, 41.2539, 41.2664 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:57:40Z", + "avg_ns": 3600014972, + "stddev_ns": 388755, + "avg_ts": 142.221632, + "stddev_ts": 0.015174, + "samples_ns": [ + 3600002936, + 3600404946, + 3599637035 + ], + "samples_ts": [ + 142.222, + 142.206, + 142.237 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:57:54Z", + "avg_ns": 3102364313, + "stddev_ns": 498456, + "avg_ts": 41.258856, + "stddev_ts": 0.00663, + "samples_ns": [ + 3102562408, + 3102733269, + 3101797262 + ], + "samples_ts": [ + 41.2562, + 41.2539, + 41.2664 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 42 + }, + { + "timestamp_utc": "2025-12-08T20:58:57.448581+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:58:04Z\",\n \"avg_ns\": 3599569232,\n \"stddev_ns\": 472670,\n \"avg_ts\": 142.239244,\n \"stddev_ts\": 0.018528,\n \"samples_ns\": [ 3599057454, 3599978028, 3599672215 ],\n \"samples_ts\": [ 142.259, 142.223, 142.235 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:58:19Z\",\n \"avg_ns\": 12730146253,\n \"stddev_ns\": 5849413,\n \"avg_ts\": 40.219496,\n \"stddev_ts\": 0.018478,\n \"samples_ns\": [ 12723435885, 12732854607, 12734148269 ],\n \"samples_ts\": [ 40.2407, 40.2109, 40.2069 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:58:04Z", + "avg_ns": 3599569232, + "stddev_ns": 472670, + "avg_ts": 142.239244, + "stddev_ts": 0.018528, + "samples_ns": [ + 3599057454, + 3599978028, + 3599672215 + ], + "samples_ts": [ + 142.259, + 142.223, + 142.235 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:58:19Z", + "avg_ns": 12730146253, + "stddev_ns": 5849413, + "avg_ts": 40.219496, + "stddev_ts": 0.018478, + "samples_ns": [ + 12723435885, + 12732854607, + 12734148269 + ], + "samples_ts": [ + 40.2407, + 40.2109, + 40.2069 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 43 + }, + { + "timestamp_utc": "2025-12-08T20:59:11.020082+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:58:58Z\",\n \"avg_ns\": 872306871,\n \"stddev_ns\": 119174,\n \"avg_ts\": 146.737353,\n \"stddev_ts\": 0.019423,\n \"samples_ns\": [ 872389182, 872174892, 872356540 ],\n \"samples_ts\": [ 146.724, 146.76, 146.729 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:59:01Z\",\n \"avg_ns\": 3095021122,\n \"stddev_ns\": 507182,\n \"avg_ts\": 41.356746,\n \"stddev_ts\": 0.006777,\n \"samples_ns\": [ 3095604939, 3094769245, 3094689182 ],\n \"samples_ts\": [ 41.3489, 41.3601, 41.3612 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:58:58Z", + "avg_ns": 872306871, + "stddev_ns": 119174, + "avg_ts": 146.737353, + "stddev_ts": 0.019423, + "samples_ns": [ + 872389182, + 872174892, + 872356540 + ], + "samples_ts": [ + 146.724, + 146.76, + 146.729 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T20:59:01Z", + "avg_ns": 3095021122, + "stddev_ns": 507182, + "avg_ts": 41.356746, + "stddev_ts": 0.006777, + "samples_ns": [ + 3095604939, + 3094769245, + 3094689182 + ], + "samples_ts": [ + 41.3489, + 41.3601, + 41.3612 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 44 + }, + { + "timestamp_utc": "2025-12-08T20:59:53.327708+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:59:11Z\",\n \"avg_ns\": 872392780,\n \"stddev_ns\": 57059,\n \"avg_ts\": 146.722902,\n \"stddev_ts\": 0.008211,\n \"samples_ns\": [ 872340884, 872437792, 872399665 ],\n \"samples_ts\": [ 146.732, 146.715, 146.722 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:59:15Z\",\n \"avg_ns\": 12681116834,\n \"stddev_ns\": 1653963,\n \"avg_ts\": 40.374993,\n \"stddev_ts\": 0.005266,\n \"samples_ns\": [ 12679337322, 12682607088, 12681406092 ],\n \"samples_ts\": [ 40.3807, 40.3702, 40.3741 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:59:11Z", + "avg_ns": 872392780, + "stddev_ns": 57059, + "avg_ts": 146.722902, + "stddev_ts": 0.008211, + "samples_ns": [ + 872340884, + 872437792, + 872399665 + ], + "samples_ts": [ + 146.732, + 146.715, + 146.722 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T20:59:15Z", + "avg_ns": 12681116834, + "stddev_ns": 1653963, + "avg_ts": 40.374993, + "stddev_ts": 0.005266, + "samples_ns": [ + 12679337322, + 12682607088, + 12681406092 + ], + "samples_ts": [ + 40.3807, + 40.3702, + 40.3741 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 45 + }, + { + "timestamp_utc": "2025-12-08T21:00:18.396021+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T20:59:53Z\",\n \"avg_ns\": 3736275800,\n \"stddev_ns\": 1155368,\n \"avg_ts\": 137.034860,\n \"stddev_ts\": 0.042375,\n \"samples_ns\": [ 3737442573, 3735132184, 3736252643 ],\n \"samples_ts\": [ 136.992, 137.077, 137.036 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:00:08Z\",\n \"avg_ns\": 3113956767,\n \"stddev_ns\": 1462826,\n \"avg_ts\": 41.105265,\n \"stddev_ts\": 0.019314,\n \"samples_ns\": [ 3115086468, 3114479433, 3112304400 ],\n \"samples_ts\": [ 41.0904, 41.0984, 41.1271 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T20:59:53Z", + "avg_ns": 3736275800, + "stddev_ns": 1155368, + "avg_ts": 137.03486, + "stddev_ts": 0.042375, + "samples_ns": [ + 3737442573, + 3735132184, + 3736252643 + ], + "samples_ts": [ + 136.992, + 137.077, + 137.036 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:00:08Z", + "avg_ns": 3113956767, + "stddev_ns": 1462826, + "avg_ts": 41.105265, + "stddev_ts": 0.019314, + "samples_ns": [ + 3115086468, + 3114479433, + 3112304400 + ], + "samples_ts": [ + 41.0904, + 41.0984, + 41.1271 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 46 + }, + { + "timestamp_utc": "2025-12-08T21:01:12.125056+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:00:19Z\",\n \"avg_ns\": 3733892814,\n \"stddev_ns\": 369001,\n \"avg_ts\": 137.122309,\n \"stddev_ts\": 0.013551,\n \"samples_ns\": [ 3733924281, 3733509087, 3734245074 ],\n \"samples_ts\": [ 137.121, 137.136, 137.109 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:00:34Z\",\n \"avg_ns\": 12671400170,\n \"stddev_ns\": 8811570,\n \"avg_ts\": 40.405966,\n \"stddev_ts\": 0.028093,\n \"samples_ns\": [ 12680915481, 12663521952, 12669763077 ],\n \"samples_ts\": [ 40.3756, 40.4311, 40.4112 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:00:19Z", + "avg_ns": 3733892814, + "stddev_ns": 369001, + "avg_ts": 137.122309, + "stddev_ts": 0.013551, + "samples_ns": [ + 3733924281, + 3733509087, + 3734245074 + ], + "samples_ts": [ + 137.121, + 137.136, + 137.109 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:00:34Z", + "avg_ns": 12671400170, + "stddev_ns": 8811570, + "avg_ts": 40.405966, + "stddev_ts": 0.028093, + "samples_ns": [ + 12680915481, + 12663521952, + 12669763077 + ], + "samples_ts": [ + 40.3756, + 40.4311, + 40.4112 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 47 + }, + { + "timestamp_utc": "2025-12-08T21:01:25.786891+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:01:12Z\",\n \"avg_ns\": 872791340,\n \"stddev_ns\": 85218,\n \"avg_ts\": 146.655901,\n \"stddev_ts\": 0.012480,\n \"samples_ns\": [ 872732872, 872766236, 872874914 ],\n \"samples_ts\": [ 146.666, 146.66, 146.642 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:01:16Z\",\n \"avg_ns\": 3117834535,\n \"stddev_ns\": 5003343,\n \"avg_ts\": 41.054205,\n \"stddev_ts\": 0.065845,\n \"samples_ns\": [ 3113556648, 3116610711, 3123336246 ],\n \"samples_ts\": [ 41.1105, 41.0703, 40.9818 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:01:12Z", + "avg_ns": 872791340, + "stddev_ns": 85218, + "avg_ts": 146.655901, + "stddev_ts": 0.01248, + "samples_ns": [ + 872732872, + 872766236, + 872874914 + ], + "samples_ts": [ + 146.666, + 146.66, + 146.642 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:01:16Z", + "avg_ns": 3117834535, + "stddev_ns": 5003343, + "avg_ts": 41.054205, + "stddev_ts": 0.065845, + "samples_ns": [ + 3113556648, + 3116610711, + 3123336246 + ], + "samples_ts": [ + 41.1105, + 41.0703, + 40.9818 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 48 + }, + { + "timestamp_utc": "2025-12-08T21:02:08.095387+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:01:26Z\",\n \"avg_ns\": 872058144,\n \"stddev_ns\": 130829,\n \"avg_ts\": 146.779206,\n \"stddev_ts\": 0.020869,\n \"samples_ns\": [ 872176144, 871928936, 872069354 ],\n \"samples_ts\": [ 146.759, 146.801, 146.777 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:01:29Z\",\n \"avg_ns\": 12673382412,\n \"stddev_ns\": 1889673,\n \"avg_ts\": 40.399634,\n \"stddev_ts\": 0.006024,\n \"samples_ns\": [ 12675400760, 12671655206, 12673091270 ],\n \"samples_ts\": [ 40.3932, 40.4051, 40.4006 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:01:26Z", + "avg_ns": 872058144, + "stddev_ns": 130829, + "avg_ts": 146.779206, + "stddev_ts": 0.020869, + "samples_ns": [ + 872176144, + 871928936, + 872069354 + ], + "samples_ts": [ + 146.759, + 146.801, + 146.777 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:01:29Z", + "avg_ns": 12673382412, + "stddev_ns": 1889673, + "avg_ts": 40.399634, + "stddev_ts": 0.006024, + "samples_ns": [ + 12675400760, + 12671655206, + 12673091270 + ], + "samples_ts": [ + 40.3932, + 40.4051, + 40.4006 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 49 + }, + { + "timestamp_utc": "2025-12-08T21:02:32.533824+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:02:08Z\",\n \"avg_ns\": 3572540263,\n \"stddev_ns\": 381415,\n \"avg_ts\": 143.315391,\n \"stddev_ts\": 0.015300,\n \"samples_ns\": [ 3572468658, 3572952406, 3572199725 ],\n \"samples_ts\": [ 143.318, 143.299, 143.329 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:02:23Z\",\n \"avg_ns\": 3124251241,\n \"stddev_ns\": 1205047,\n \"avg_ts\": 40.969821,\n \"stddev_ts\": 0.015799,\n \"samples_ns\": [ 3123393750, 3125629024, 3123730949 ],\n \"samples_ts\": [ 40.9811, 40.9518, 40.9766 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:02:08Z", + "avg_ns": 3572540263, + "stddev_ns": 381415, + "avg_ts": 143.315391, + "stddev_ts": 0.0153, + "samples_ns": [ + 3572468658, + 3572952406, + 3572199725 + ], + "samples_ts": [ + 143.318, + 143.299, + 143.329 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:02:23Z", + "avg_ns": 3124251241, + "stddev_ns": 1205047, + "avg_ts": 40.969821, + "stddev_ts": 0.015799, + "samples_ns": [ + 3123393750, + 3125629024, + 3123730949 + ], + "samples_ts": [ + 40.9811, + 40.9518, + 40.9766 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 50 + }, + { + "timestamp_utc": "2025-12-08T21:03:25.669177+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:02:33Z\",\n \"avg_ns\": 3572171258,\n \"stddev_ns\": 185473,\n \"avg_ts\": 143.330194,\n \"stddev_ts\": 0.007442,\n \"samples_ns\": [ 3571980172, 3572350556, 3572183046 ],\n \"samples_ts\": [ 143.338, 143.323, 143.33 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:02:47Z\",\n \"avg_ns\": 12682023970,\n \"stddev_ns\": 2152672,\n \"avg_ts\": 40.372106,\n \"stddev_ts\": 0.006833,\n \"samples_ns\": [ 12684487096, 12681034060, 12680550756 ],\n \"samples_ts\": [ 40.3643, 40.3753, 40.3768 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:02:33Z", + "avg_ns": 3572171258, + "stddev_ns": 185473, + "avg_ts": 143.330194, + "stddev_ts": 0.007442, + "samples_ns": [ + 3571980172, + 3572350556, + 3572183046 + ], + "samples_ts": [ + 143.338, + 143.323, + 143.33 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:02:47Z", + "avg_ns": 12682023970, + "stddev_ns": 2152672, + "avg_ts": 40.372106, + "stddev_ts": 0.006833, + "samples_ns": [ + 12684487096, + 12681034060, + 12680550756 + ], + "samples_ts": [ + 40.3643, + 40.3753, + 40.3768 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 51 + }, + { + "timestamp_utc": "2025-12-08T21:03:39.279602+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:03:26Z\",\n \"avg_ns\": 872801027,\n \"stddev_ns\": 86626,\n \"avg_ts\": 146.654274,\n \"stddev_ts\": 0.013683,\n \"samples_ns\": [ 872811465, 872714879, 872876738 ],\n \"samples_ts\": [ 146.653, 146.669, 146.642 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:03:29Z\",\n \"avg_ns\": 3114050561,\n \"stddev_ns\": 1241419,\n \"avg_ts\": 41.104025,\n \"stddev_ts\": 0.016383,\n \"samples_ns\": [ 3113039557, 3113675991, 3115436135 ],\n \"samples_ts\": [ 41.1174, 41.109, 41.0857 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:03:26Z", + "avg_ns": 872801027, + "stddev_ns": 86626, + "avg_ts": 146.654274, + "stddev_ts": 0.013683, + "samples_ns": [ + 872811465, + 872714879, + 872876738 + ], + "samples_ts": [ + 146.653, + 146.669, + 146.642 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:03:29Z", + "avg_ns": 3114050561, + "stddev_ns": 1241419, + "avg_ts": 41.104025, + "stddev_ts": 0.016383, + "samples_ns": [ + 3113039557, + 3113675991, + 3115436135 + ], + "samples_ts": [ + 41.1174, + 41.109, + 41.0857 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 52 + }, + { + "timestamp_utc": "2025-12-08T21:04:21.847213+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:03:39Z\",\n \"avg_ns\": 872204703,\n \"stddev_ns\": 110053,\n \"avg_ts\": 146.754541,\n \"stddev_ts\": 0.017838,\n \"samples_ns\": [ 872105662, 872316536, 872191912 ],\n \"samples_ts\": [ 146.771, 146.736, 146.757 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:03:43Z\",\n \"avg_ns\": 12764831120,\n \"stddev_ns\": 6122645,\n \"avg_ts\": 40.110212,\n \"stddev_ts\": 0.019236,\n \"samples_ns\": [ 12771584629, 12759643462, 12763265269 ],\n \"samples_ts\": [ 40.089, 40.1265, 40.1151 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:03:39Z", + "avg_ns": 872204703, + "stddev_ns": 110053, + "avg_ts": 146.754541, + "stddev_ts": 0.017838, + "samples_ns": [ + 872105662, + 872316536, + 872191912 + ], + "samples_ts": [ + 146.771, + 146.736, + 146.757 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:03:43Z", + "avg_ns": 12764831120, + "stddev_ns": 6122645, + "avg_ts": 40.110212, + "stddev_ts": 0.019236, + "samples_ns": [ + 12771584629, + 12759643462, + 12763265269 + ], + "samples_ts": [ + 40.089, + 40.1265, + 40.1151 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 53 + }, + { + "timestamp_utc": "2025-12-08T21:04:46.444987+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:04:22Z\",\n \"avg_ns\": 3620284598,\n \"stddev_ns\": 328816,\n \"avg_ts\": 141.425346,\n \"stddev_ts\": 0.012629,\n \"samples_ns\": [ 3619922391, 3620543834, 3620387570 ],\n \"samples_ts\": [ 141.439, 141.415, 141.421 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:04:37Z\",\n \"avg_ns\": 3104391842,\n \"stddev_ns\": 1833840,\n \"avg_ts\": 41.231918,\n \"stddev_ts\": 0.024349,\n \"samples_ns\": [ 3103079015, 3103609390, 3106487121 ],\n \"samples_ts\": [ 41.2494, 41.2423, 41.2041 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:04:22Z", + "avg_ns": 3620284598, + "stddev_ns": 328816, + "avg_ts": 141.425346, + "stddev_ts": 0.012629, + "samples_ns": [ + 3619922391, + 3620543834, + 3620387570 + ], + "samples_ts": [ + 141.439, + 141.415, + 141.421 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:04:37Z", + "avg_ns": 3104391842, + "stddev_ns": 1833840, + "avg_ts": 41.231918, + "stddev_ts": 0.024349, + "samples_ns": [ + 3103079015, + 3103609390, + 3106487121 + ], + "samples_ts": [ + 41.2494, + 41.2423, + 41.2041 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 54 + }, + { + "timestamp_utc": "2025-12-08T21:05:39.806612+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:04:47Z\",\n \"avg_ns\": 3602433437,\n \"stddev_ns\": 1180086,\n \"avg_ts\": 142.126162,\n \"stddev_ts\": 0.046552,\n \"samples_ns\": [ 3603749728, 3601470108, 3602080475 ],\n \"samples_ts\": [ 142.074, 142.164, 142.14 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:05:01Z\",\n \"avg_ns\": 12724268346,\n \"stddev_ns\": 5720484,\n \"avg_ts\": 40.238075,\n \"stddev_ts\": 0.018091,\n \"samples_ns\": [ 12727859790, 12717672899, 12727272350 ],\n \"samples_ts\": [ 40.2267, 40.2589, 40.2286 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:04:47Z", + "avg_ns": 3602433437, + "stddev_ns": 1180086, + "avg_ts": 142.126162, + "stddev_ts": 0.046552, + "samples_ns": [ + 3603749728, + 3601470108, + 3602080475 + ], + "samples_ts": [ + 142.074, + 142.164, + 142.14 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:05:01Z", + "avg_ns": 12724268346, + "stddev_ns": 5720484, + "avg_ts": 40.238075, + "stddev_ts": 0.018091, + "samples_ns": [ + 12727859790, + 12717672899, + 12727272350 + ], + "samples_ts": [ + 40.2267, + 40.2589, + 40.2286 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 55 + }, + { + "timestamp_utc": "2025-12-08T21:05:53.360058+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:05:40Z\",\n \"avg_ns\": 872031450,\n \"stddev_ns\": 76046,\n \"avg_ts\": 146.783697,\n \"stddev_ts\": 0.010698,\n \"samples_ns\": [ 872055339, 872079598, 871959415 ],\n \"samples_ts\": [ 146.78, 146.776, 146.796 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:05:43Z\",\n \"avg_ns\": 3097169772,\n \"stddev_ns\": 958321,\n \"avg_ts\": 41.328057,\n \"stddev_ts\": 0.012786,\n \"samples_ns\": [ 3098257830, 3096451165, 3096800321 ],\n \"samples_ts\": [ 41.3135, 41.3376, 41.333 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:05:40Z", + "avg_ns": 872031450, + "stddev_ns": 76046, + "avg_ts": 146.783697, + "stddev_ts": 0.010698, + "samples_ns": [ + 872055339, + 872079598, + 871959415 + ], + "samples_ts": [ + 146.78, + 146.776, + 146.796 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:05:43Z", + "avg_ns": 3097169772, + "stddev_ns": 958321, + "avg_ts": 41.328057, + "stddev_ts": 0.012786, + "samples_ns": [ + 3098257830, + 3096451165, + 3096800321 + ], + "samples_ts": [ + 41.3135, + 41.3376, + 41.333 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 56 + }, + { + "timestamp_utc": "2025-12-08T21:06:35.839374+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:05:54Z\",\n \"avg_ns\": 884664503,\n \"stddev_ns\": 3447568,\n \"avg_ts\": 144.689085,\n \"stddev_ts\": 0.564588,\n \"samples_ns\": [ 885520593, 887603103, 880869815 ],\n \"samples_ts\": [ 144.548, 144.209, 145.311 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:05:57Z\",\n \"avg_ns\": 12721240700,\n \"stddev_ns\": 6042335,\n \"avg_ts\": 40.247653,\n \"stddev_ts\": 0.019115,\n \"samples_ns\": [ 12714940819, 12726985004, 12721796278 ],\n \"samples_ts\": [ 40.2676, 40.2295, 40.2459 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:05:54Z", + "avg_ns": 884664503, + "stddev_ns": 3447568, + "avg_ts": 144.689085, + "stddev_ts": 0.564588, + "samples_ns": [ + 885520593, + 887603103, + 880869815 + ], + "samples_ts": [ + 144.548, + 144.209, + 145.311 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:05:57Z", + "avg_ns": 12721240700, + "stddev_ns": 6042335, + "avg_ts": 40.247653, + "stddev_ts": 0.019115, + "samples_ns": [ + 12714940819, + 12726985004, + 12721796278 + ], + "samples_ts": [ + 40.2676, + 40.2295, + 40.2459 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 57 + }, + { + "timestamp_utc": "2025-12-08T21:07:00.906566+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:06:36Z\",\n \"avg_ns\": 3736657334,\n \"stddev_ns\": 2802475,\n \"avg_ts\": 137.020911,\n \"stddev_ts\": 0.102673,\n \"samples_ns\": [ 3739881241, 3734819065, 3735271698 ],\n \"samples_ts\": [ 136.903, 137.088, 137.072 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:06:51Z\",\n \"avg_ns\": 3108939858,\n \"stddev_ns\": 794205,\n \"avg_ts\": 41.171593,\n \"stddev_ts\": 0.010519,\n \"samples_ns\": [ 3108041241, 3109547684, 3109230649 ],\n \"samples_ts\": [ 41.1835, 41.1635, 41.1677 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:06:36Z", + "avg_ns": 3736657334, + "stddev_ns": 2802475, + "avg_ts": 137.020911, + "stddev_ts": 0.102673, + "samples_ns": [ + 3739881241, + 3734819065, + 3735271698 + ], + "samples_ts": [ + 136.903, + 137.088, + 137.072 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:06:51Z", + "avg_ns": 3108939858, + "stddev_ns": 794205, + "avg_ts": 41.171593, + "stddev_ts": 0.010519, + "samples_ns": [ + 3108041241, + 3109547684, + 3109230649 + ], + "samples_ts": [ + 41.1835, + 41.1635, + 41.1677 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 58 + }, + { + "timestamp_utc": "2025-12-08T21:07:54.585535+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:07:01Z\",\n \"avg_ns\": 3735990562,\n \"stddev_ns\": 500280,\n \"avg_ts\": 137.045316,\n \"stddev_ts\": 0.018350,\n \"samples_ns\": [ 3735618550, 3736559303, 3735793833 ],\n \"samples_ts\": [ 137.059, 137.024, 137.053 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:07:16Z\",\n \"avg_ns\": 12650061080,\n \"stddev_ns\": 2816199,\n \"avg_ts\": 40.474114,\n \"stddev_ts\": 0.009010,\n \"samples_ns\": [ 12650066627, 12652874502, 12647242111 ],\n \"samples_ts\": [ 40.4741, 40.4651, 40.4831 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:07:01Z", + "avg_ns": 3735990562, + "stddev_ns": 500280, + "avg_ts": 137.045316, + "stddev_ts": 0.01835, + "samples_ns": [ + 3735618550, + 3736559303, + 3735793833 + ], + "samples_ts": [ + 137.059, + 137.024, + 137.053 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:07:16Z", + "avg_ns": 12650061080, + "stddev_ns": 2816199, + "avg_ts": 40.474114, + "stddev_ts": 0.00901, + "samples_ns": [ + 12650066627, + 12652874502, + 12647242111 + ], + "samples_ts": [ + 40.4741, + 40.4651, + 40.4831 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 59 + }, + { + "timestamp_utc": "2025-12-08T21:08:08.182976+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:07:55Z\",\n \"avg_ns\": 872312867,\n \"stddev_ns\": 131254,\n \"avg_ts\": 146.736345,\n \"stddev_ts\": 0.020929,\n \"samples_ns\": [ 872456494, 872237640, 872244469 ],\n \"samples_ts\": [ 146.712, 146.749, 146.748 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:07:58Z\",\n \"avg_ns\": 3111036868,\n \"stddev_ns\": 1387441,\n \"avg_ts\": 41.143844,\n \"stddev_ts\": 0.018330,\n \"samples_ns\": [ 3110390491, 3112628335, 3110091779 ],\n \"samples_ts\": [ 41.1524, 41.1228, 41.1563 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:07:55Z", + "avg_ns": 872312867, + "stddev_ns": 131254, + "avg_ts": 146.736345, + "stddev_ts": 0.020929, + "samples_ns": [ + 872456494, + 872237640, + 872244469 + ], + "samples_ts": [ + 146.712, + 146.749, + 146.748 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:07:58Z", + "avg_ns": 3111036868, + "stddev_ns": 1387441, + "avg_ts": 41.143844, + "stddev_ts": 0.01833, + "samples_ns": [ + 3110390491, + 3112628335, + 3110091779 + ], + "samples_ts": [ + 41.1524, + 41.1228, + 41.1563 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 60 + }, + { + "timestamp_utc": "2025-12-08T21:08:50.574556+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:08:08Z\",\n \"avg_ns\": 875840104,\n \"stddev_ns\": 144346,\n \"avg_ts\": 146.145400,\n \"stddev_ts\": 0.024088,\n \"samples_ns\": [ 875936047, 875674098, 875910167 ],\n \"samples_ts\": [ 146.129, 146.173, 146.134 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:08:12Z\",\n \"avg_ns\": 12688162445,\n \"stddev_ns\": 19177340,\n \"avg_ts\": 40.352634,\n \"stddev_ts\": 0.060950,\n \"samples_ns\": [ 12709718196, 12672993373, 12681775766 ],\n \"samples_ts\": [ 40.2841, 40.4009, 40.3729 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:08:08Z", + "avg_ns": 875840104, + "stddev_ns": 144346, + "avg_ts": 146.1454, + "stddev_ts": 0.024088, + "samples_ns": [ + 875936047, + 875674098, + 875910167 + ], + "samples_ts": [ + 146.129, + 146.173, + 146.134 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:08:12Z", + "avg_ns": 12688162445, + "stddev_ns": 19177340, + "avg_ts": 40.352634, + "stddev_ts": 0.06095, + "samples_ns": [ + 12709718196, + 12672993373, + 12681775766 + ], + "samples_ts": [ + 40.2841, + 40.4009, + 40.3729 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 61 + }, + { + "timestamp_utc": "2025-12-08T21:09:14.962951+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:08:51Z\",\n \"avg_ns\": 3571676174,\n \"stddev_ns\": 311759,\n \"avg_ts\": 143.350062,\n \"stddev_ts\": 0.012280,\n \"samples_ns\": [ 3572006356, 3571619976, 3571402191 ],\n \"samples_ts\": [ 143.337, 143.352, 143.361 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:09:05Z\",\n \"avg_ns\": 3108750307,\n \"stddev_ns\": 635723,\n \"avg_ts\": 41.174102,\n \"stddev_ts\": 0.008420,\n \"samples_ns\": [ 3109344204, 3108079715, 3108827002 ],\n \"samples_ts\": [ 41.1662, 41.183, 41.1731 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:08:51Z", + "avg_ns": 3571676174, + "stddev_ns": 311759, + "avg_ts": 143.350062, + "stddev_ts": 0.01228, + "samples_ns": [ + 3572006356, + 3571619976, + 3571402191 + ], + "samples_ts": [ + 143.337, + 143.352, + 143.361 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:09:05Z", + "avg_ns": 3108750307, + "stddev_ns": 635723, + "avg_ts": 41.174102, + "stddev_ts": 0.00842, + "samples_ns": [ + 3109344204, + 3108079715, + 3108827002 + ], + "samples_ts": [ + 41.1662, + 41.183, + 41.1731 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 62 + }, + { + "timestamp_utc": "2025-12-08T21:10:08.086768+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:09:15Z\",\n \"avg_ns\": 3582695655,\n \"stddev_ns\": 208851,\n \"avg_ts\": 142.909153,\n \"stddev_ts\": 0.007981,\n \"samples_ns\": [ 3582633809, 3582919367, 3582533790 ],\n \"samples_ts\": [ 142.912, 142.9, 142.916 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:09:29Z\",\n \"avg_ns\": 12662614054,\n \"stddev_ns\": 2243504,\n \"avg_ts\": 40.433990,\n \"stddev_ts\": 0.007155,\n \"samples_ns\": [ 12662966796, 12660217925, 12664657442 ],\n \"samples_ts\": [ 40.4329, 40.4416, 40.4275 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:09:15Z", + "avg_ns": 3582695655, + "stddev_ns": 208851, + "avg_ts": 142.909153, + "stddev_ts": 0.007981, + "samples_ns": [ + 3582633809, + 3582919367, + 3582533790 + ], + "samples_ts": [ + 142.912, + 142.9, + 142.916 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:09:29Z", + "avg_ns": 12662614054, + "stddev_ns": 2243504, + "avg_ts": 40.43399, + "stddev_ts": 0.007155, + "samples_ns": [ + 12662966796, + 12660217925, + 12664657442 + ], + "samples_ts": [ + 40.4329, + 40.4416, + 40.4275 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 63 + }, + { + "timestamp_utc": "2025-12-08T21:10:21.748304+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:10:08Z\",\n \"avg_ns\": 876510739,\n \"stddev_ns\": 345111,\n \"avg_ts\": 146.033593,\n \"stddev_ts\": 0.057086,\n \"samples_ns\": [ 876710587, 876115190, 876706442 ],\n \"samples_ts\": [ 146, 146.1, 146.001 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:10:12Z\",\n \"avg_ns\": 3126004096,\n \"stddev_ns\": 10277114,\n \"avg_ts\": 40.947139,\n \"stddev_ts\": 0.134857,\n \"samples_ns\": [ 3133044153, 3130756978, 3114211159 ],\n \"samples_ts\": [ 40.8548, 40.8847, 41.1019 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:10:08Z", + "avg_ns": 876510739, + "stddev_ns": 345111, + "avg_ts": 146.033593, + "stddev_ts": 0.057086, + "samples_ns": [ + 876710587, + 876115190, + 876706442 + ], + "samples_ts": [ + 146, + 146.1, + 146.001 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:10:12Z", + "avg_ns": 3126004096, + "stddev_ns": 10277114, + "avg_ts": 40.947139, + "stddev_ts": 0.134857, + "samples_ns": [ + 3133044153, + 3130756978, + 3114211159 + ], + "samples_ts": [ + 40.8548, + 40.8847, + 41.1019 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 64 + }, + { + "timestamp_utc": "2025-12-08T21:11:04.127690+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:10:22Z\",\n \"avg_ns\": 872255449,\n \"stddev_ns\": 180355,\n \"avg_ts\": 146.746006,\n \"stddev_ts\": 0.029935,\n \"samples_ns\": [ 872308623, 872057004, 872400721 ],\n \"samples_ts\": [ 146.737, 146.779, 146.722 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:10:25Z\",\n \"avg_ns\": 12705605264,\n \"stddev_ns\": 10038963,\n \"avg_ts\": 40.297192,\n \"stddev_ts\": 0.031845,\n \"samples_ns\": [ 12694492867, 12708308161, 12714014766 ],\n \"samples_ts\": [ 40.3325, 40.2886, 40.2705 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:10:22Z", + "avg_ns": 872255449, + "stddev_ns": 180355, + "avg_ts": 146.746006, + "stddev_ts": 0.029935, + "samples_ns": [ + 872308623, + 872057004, + 872400721 + ], + "samples_ts": [ + 146.737, + 146.779, + 146.722 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:10:25Z", + "avg_ns": 12705605264, + "stddev_ns": 10038963, + "avg_ts": 40.297192, + "stddev_ts": 0.031845, + "samples_ns": [ + 12694492867, + 12708308161, + 12714014766 + ], + "samples_ts": [ + 40.3325, + 40.2886, + 40.2705 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 65 + }, + { + "timestamp_utc": "2025-12-08T21:11:28.728680+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:11:04Z\",\n \"avg_ns\": 3622558167,\n \"stddev_ns\": 83399,\n \"avg_ts\": 141.336585,\n \"stddev_ts\": 0.002252,\n \"samples_ns\": [ 3622546818, 3622620731, 3622506953 ],\n \"samples_ts\": [ 141.337, 141.334, 141.339 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:11:19Z\",\n \"avg_ns\": 3109899267,\n \"stddev_ns\": 2385093,\n \"avg_ts\": 41.158906,\n \"stddev_ts\": 0.031544,\n \"samples_ns\": [ 3108411457, 3108636839, 3112649506 ],\n \"samples_ts\": [ 41.1786, 41.1756, 41.1225 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:11:04Z", + "avg_ns": 3622558167, + "stddev_ns": 83399, + "avg_ts": 141.336585, + "stddev_ts": 0.002252, + "samples_ns": [ + 3622546818, + 3622620731, + 3622506953 + ], + "samples_ts": [ + 141.337, + 141.334, + 141.339 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:11:19Z", + "avg_ns": 3109899267, + "stddev_ns": 2385093, + "avg_ts": 41.158906, + "stddev_ts": 0.031544, + "samples_ns": [ + 3108411457, + 3108636839, + 3112649506 + ], + "samples_ts": [ + 41.1786, + 41.1756, + 41.1225 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 66 + }, + { + "timestamp_utc": "2025-12-08T21:12:21.913621+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:11:29Z\",\n \"avg_ns\": 3622339954,\n \"stddev_ns\": 144719,\n \"avg_ts\": 141.345099,\n \"stddev_ts\": 0.005136,\n \"samples_ns\": [ 3622200245, 3622358016, 3622461602 ],\n \"samples_ts\": [ 141.351, 141.344, 141.34 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:11:43Z\",\n \"avg_ns\": 12630472157,\n \"stddev_ns\": 9341214,\n \"avg_ts\": 40.536900,\n \"stddev_ts\": 0.029978,\n \"samples_ns\": [ 12640097010, 12629876357, 12621443104 ],\n \"samples_ts\": [ 40.506, 40.5388, 40.5659 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:11:29Z", + "avg_ns": 3622339954, + "stddev_ns": 144719, + "avg_ts": 141.345099, + "stddev_ts": 0.005136, + "samples_ns": [ + 3622200245, + 3622358016, + 3622461602 + ], + "samples_ts": [ + 141.351, + 141.344, + 141.34 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:11:43Z", + "avg_ns": 12630472157, + "stddev_ns": 9341214, + "avg_ts": 40.5369, + "stddev_ts": 0.029978, + "samples_ns": [ + 12640097010, + 12629876357, + 12621443104 + ], + "samples_ts": [ + 40.506, + 40.5388, + 40.5659 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 67 + }, + { + "timestamp_utc": "2025-12-08T21:12:35.648910+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:12:22Z\",\n \"avg_ns\": 885435373,\n \"stddev_ns\": 802871,\n \"avg_ts\": 144.561731,\n \"stddev_ts\": 0.130961,\n \"samples_ns\": [ 885750872, 886031400, 884523849 ],\n \"samples_ts\": [ 144.51, 144.464, 144.711 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:12:26Z\",\n \"avg_ns\": 3137697631,\n \"stddev_ns\": 1091905,\n \"avg_ts\": 40.794246,\n \"stddev_ts\": 0.014161,\n \"samples_ns\": [ 3138536599, 3136466925, 3138089371 ],\n \"samples_ts\": [ 40.7833, 40.8103, 40.7892 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:12:22Z", + "avg_ns": 885435373, + "stddev_ns": 802871, + "avg_ts": 144.561731, + "stddev_ts": 0.130961, + "samples_ns": [ + 885750872, + 886031400, + 884523849 + ], + "samples_ts": [ + 144.51, + 144.464, + 144.711 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:12:26Z", + "avg_ns": 3137697631, + "stddev_ns": 1091905, + "avg_ts": 40.794246, + "stddev_ts": 0.014161, + "samples_ns": [ + 3138536599, + 3136466925, + 3138089371 + ], + "samples_ts": [ + 40.7833, + 40.8103, + 40.7892 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 68 + }, + { + "timestamp_utc": "2025-12-08T21:13:18.400899+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:12:36Z\",\n \"avg_ns\": 885055514,\n \"stddev_ns\": 613290,\n \"avg_ts\": 144.623743,\n \"stddev_ts\": 0.100101,\n \"samples_ns\": [ 884433707, 885658405, 885074431 ],\n \"samples_ts\": [ 144.725, 144.525, 144.621 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:12:39Z\",\n \"avg_ns\": 12795955941,\n \"stddev_ns\": 8029115,\n \"avg_ts\": 40.012652,\n \"stddev_ts\": 0.025116,\n \"samples_ns\": [ 12786711823, 12801191525, 12799964475 ],\n \"samples_ts\": [ 40.0416, 39.9963, 40.0001 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:12:36Z", + "avg_ns": 885055514, + "stddev_ns": 613290, + "avg_ts": 144.623743, + "stddev_ts": 0.100101, + "samples_ns": [ + 884433707, + 885658405, + 885074431 + ], + "samples_ts": [ + 144.725, + 144.525, + 144.621 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:12:39Z", + "avg_ns": 12795955941, + "stddev_ns": 8029115, + "avg_ts": 40.012652, + "stddev_ts": 0.025116, + "samples_ns": [ + 12786711823, + 12801191525, + 12799964475 + ], + "samples_ts": [ + 40.0416, + 39.9963, + 40.0001 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 69 + }, + { + "timestamp_utc": "2025-12-08T21:13:43.743917+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:13:19Z\",\n \"avg_ns\": 3804577426,\n \"stddev_ns\": 769205,\n \"avg_ts\": 134.574739,\n \"stddev_ts\": 0.027123,\n \"samples_ns\": [ 3803704589, 3805142256, 3804885434 ],\n \"samples_ts\": [ 134.606, 134.555, 134.564 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:13:34Z\",\n \"avg_ns\": 3115112278,\n \"stddev_ns\": 668820,\n \"avg_ts\": 41.090013,\n \"stddev_ts\": 0.008791,\n \"samples_ns\": [ 3114467361, 3115798423, 3115071051 ],\n \"samples_ts\": [ 41.0985, 41.081, 41.0906 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:13:19Z", + "avg_ns": 3804577426, + "stddev_ns": 769205, + "avg_ts": 134.574739, + "stddev_ts": 0.027123, + "samples_ns": [ + 3803704589, + 3805142256, + 3804885434 + ], + "samples_ts": [ + 134.606, + 134.555, + 134.564 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:13:34Z", + "avg_ns": 3115112278, + "stddev_ns": 668820, + "avg_ts": 41.090013, + "stddev_ts": 0.008791, + "samples_ns": [ + 3114467361, + 3115798423, + 3115071051 + ], + "samples_ts": [ + 41.0985, + 41.081, + 41.0906 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 70 + }, + { + "timestamp_utc": "2025-12-08T21:14:37.398025+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:13:44Z\",\n \"avg_ns\": 3741853247,\n \"stddev_ns\": 586206,\n \"avg_ts\": 136.830596,\n \"stddev_ts\": 0.021202,\n \"samples_ns\": [ 3742385518, 3741938780, 3741235445 ],\n \"samples_ts\": [ 136.811, 136.827, 136.853 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:13:59Z\",\n \"avg_ns\": 12633606479,\n \"stddev_ns\": 3742838,\n \"avg_ts\": 40.526831,\n \"stddev_ts\": 0.011994,\n \"samples_ns\": [ 12631202965, 12637914829, 12631701645 ],\n \"samples_ts\": [ 40.5345, 40.513, 40.5329 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:13:44Z", + "avg_ns": 3741853247, + "stddev_ns": 586206, + "avg_ts": 136.830596, + "stddev_ts": 0.021202, + "samples_ns": [ + 3742385518, + 3741938780, + 3741235445 + ], + "samples_ts": [ + 136.811, + 136.827, + 136.853 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:13:59Z", + "avg_ns": 12633606479, + "stddev_ns": 3742838, + "avg_ts": 40.526831, + "stddev_ts": 0.011994, + "samples_ns": [ + 12631202965, + 12637914829, + 12631701645 + ], + "samples_ts": [ + 40.5345, + 40.513, + 40.5329 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 71 + }, + { + "timestamp_utc": "2025-12-08T21:14:49.748688+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:14:38Z\",\n \"avg_ns\": 604979118,\n \"stddev_ns\": 878791,\n \"avg_ts\": 211.577848,\n \"stddev_ts\": 0.307087,\n \"samples_ns\": [ 605990871, 604540629, 604405854 ],\n \"samples_ts\": [ 211.224, 211.731, 211.778 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:14:40Z\",\n \"avg_ns\": 3053405466,\n \"stddev_ns\": 1737037,\n \"avg_ts\": 41.920416,\n \"stddev_ts\": 0.023844,\n \"samples_ns\": [ 3051402095, 3054471353, 3054342951 ],\n \"samples_ts\": [ 41.9479, 41.9058, 41.9075 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:14:38Z", + "avg_ns": 604979118, + "stddev_ns": 878791, + "avg_ts": 211.577848, + "stddev_ts": 0.307087, + "samples_ns": [ + 605990871, + 604540629, + 604405854 + ], + "samples_ts": [ + 211.224, + 211.731, + 211.778 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:14:40Z", + "avg_ns": 3053405466, + "stddev_ns": 1737037, + "avg_ts": 41.920416, + "stddev_ts": 0.023844, + "samples_ns": [ + 3051402095, + 3054471353, + 3054342951 + ], + "samples_ts": [ + 41.9479, + 41.9058, + 41.9075 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 72 + }, + { + "timestamp_utc": "2025-12-08T21:15:30.562649+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:14:50Z\",\n \"avg_ns\": 605426653,\n \"stddev_ns\": 506728,\n \"avg_ts\": 211.421250,\n \"stddev_ts\": 0.176908,\n \"samples_ns\": [ 605314082, 604985676, 605980201 ],\n \"samples_ts\": [ 211.46, 211.575, 211.228 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:14:52Z\",\n \"avg_ns\": 12522859582,\n \"stddev_ns\": 12067091,\n \"avg_ts\": 40.885256,\n \"stddev_ts\": 0.039419,\n \"samples_ns\": [ 12508934760, 12529387012, 12530256974 ],\n \"samples_ts\": [ 40.9307, 40.8639, 40.8611 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:14:50Z", + "avg_ns": 605426653, + "stddev_ns": 506728, + "avg_ts": 211.42125, + "stddev_ts": 0.176908, + "samples_ns": [ + 605314082, + 604985676, + 605980201 + ], + "samples_ts": [ + 211.46, + 211.575, + 211.228 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:14:52Z", + "avg_ns": 12522859582, + "stddev_ns": 12067091, + "avg_ts": 40.885256, + "stddev_ts": 0.039419, + "samples_ns": [ + 12508934760, + 12529387012, + 12530256974 + ], + "samples_ts": [ + 40.9307, + 40.8639, + 40.8611 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 73 + }, + { + "timestamp_utc": "2025-12-08T21:15:50.464061+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:15:31Z\",\n \"avg_ns\": 2466743898,\n \"stddev_ns\": 174333,\n \"avg_ts\": 207.561069,\n \"stddev_ts\": 0.014061,\n \"samples_ns\": [ 2466560036, 2466785121, 2466886538 ],\n \"samples_ts\": [ 207.577, 207.558, 207.549 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:15:41Z\",\n \"avg_ns\": 3063916790,\n \"stddev_ns\": 1050866,\n \"avg_ts\": 41.776595,\n \"stddev_ts\": 0.014309,\n \"samples_ns\": [ 3062874511, 3064973184, 3063902676 ],\n \"samples_ts\": [ 41.7908, 41.7622, 41.7768 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:15:31Z", + "avg_ns": 2466743898, + "stddev_ns": 174333, + "avg_ts": 207.561069, + "stddev_ts": 0.014061, + "samples_ns": [ + 2466560036, + 2466785121, + 2466886538 + ], + "samples_ts": [ + 207.577, + 207.558, + 207.549 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:15:41Z", + "avg_ns": 3063916790, + "stddev_ns": 1050866, + "avg_ts": 41.776595, + "stddev_ts": 0.014309, + "samples_ns": [ + 3062874511, + 3064973184, + 3063902676 + ], + "samples_ts": [ + 41.7908, + 41.7622, + 41.7768 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 74 + }, + { + "timestamp_utc": "2025-12-08T21:16:38.376603+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:15:51Z\",\n \"avg_ns\": 2461830634,\n \"stddev_ns\": 1647624,\n \"avg_ts\": 207.975376,\n \"stddev_ts\": 0.139180,\n \"samples_ns\": [ 2462652667, 2462904677, 2459934559 ],\n \"samples_ts\": [ 207.906, 207.885, 208.136 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:00Z\",\n \"avg_ns\": 12427884086,\n \"stddev_ns\": 1725467,\n \"avg_ts\": 41.197681,\n \"stddev_ts\": 0.005720,\n \"samples_ns\": [ 12425921764, 12429163921, 12428566573 ],\n \"samples_ts\": [ 41.2042, 41.1934, 41.1954 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:15:51Z", + "avg_ns": 2461830634, + "stddev_ns": 1647624, + "avg_ts": 207.975376, + "stddev_ts": 0.13918, + "samples_ns": [ + 2462652667, + 2462904677, + 2459934559 + ], + "samples_ts": [ + 207.906, + 207.885, + 208.136 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:16:00Z", + "avg_ns": 12427884086, + "stddev_ns": 1725467, + "avg_ts": 41.197681, + "stddev_ts": 0.00572, + "samples_ns": [ + 12425921764, + 12429163921, + 12428566573 + ], + "samples_ts": [ + 41.2042, + 41.1934, + 41.1954 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 75 + }, + { + "timestamp_utc": "2025-12-08T21:16:50.772429+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:39Z\",\n \"avg_ns\": 604888728,\n \"stddev_ns\": 253282,\n \"avg_ts\": 211.609192,\n \"stddev_ts\": 0.088626,\n \"samples_ns\": [ 605067319, 604598856, 605000009 ],\n \"samples_ts\": [ 211.547, 211.711, 211.57 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:41Z\",\n \"avg_ns\": 3059335160,\n \"stddev_ns\": 895217,\n \"avg_ts\": 41.839158,\n \"stddev_ts\": 0.012241,\n \"samples_ns\": [ 3058982670, 3058669842, 3060352968 ],\n \"samples_ts\": [ 41.844, 41.8483, 41.8252 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:16:39Z", + "avg_ns": 604888728, + "stddev_ns": 253282, + "avg_ts": 211.609192, + "stddev_ts": 0.088626, + "samples_ns": [ + 605067319, + 604598856, + 605000009 + ], + "samples_ts": [ + 211.547, + 211.711, + 211.57 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:16:41Z", + "avg_ns": 3059335160, + "stddev_ns": 895217, + "avg_ts": 41.839158, + "stddev_ts": 0.012241, + "samples_ns": [ + 3058982670, + 3058669842, + 3060352968 + ], + "samples_ts": [ + 41.844, + 41.8483, + 41.8252 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 76 + }, + { + "timestamp_utc": "2025-12-08T21:17:31.508711+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:51Z\",\n \"avg_ns\": 604797851,\n \"stddev_ns\": 189839,\n \"avg_ts\": 211.640977,\n \"stddev_ts\": 0.065298,\n \"samples_ns\": [ 604731812, 605008519, 604653224 ],\n \"samples_ts\": [ 211.664, 211.567, 211.692 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:16:53Z\",\n \"avg_ns\": 12497122656,\n \"stddev_ns\": 12062261,\n \"avg_ts\": 40.969456,\n \"stddev_ts\": 0.039547,\n \"samples_ns\": [ 12508672071, 12484606738, 12498089160 ],\n \"samples_ts\": [ 40.9316, 41.0105, 40.9663 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:16:51Z", + "avg_ns": 604797851, + "stddev_ns": 189839, + "avg_ts": 211.640977, + "stddev_ts": 0.065298, + "samples_ns": [ + 604731812, + 605008519, + 604653224 + ], + "samples_ts": [ + 211.664, + 211.567, + 211.692 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:16:53Z", + "avg_ns": 12497122656, + "stddev_ns": 12062261, + "avg_ts": 40.969456, + "stddev_ts": 0.039547, + "samples_ns": [ + 12508672071, + 12484606738, + 12498089160 + ], + "samples_ts": [ + 40.9316, + 41.0105, + 40.9663 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 77 + }, + { + "timestamp_utc": "2025-12-08T21:17:51.424088+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:17:32Z\",\n \"avg_ns\": 2497965508,\n \"stddev_ns\": 804971,\n \"avg_ts\": 204.966815,\n \"stddev_ts\": 0.065795,\n \"samples_ns\": [ 2498778641, 2497942472, 2497175413 ],\n \"samples_ts\": [ 204.9, 204.969, 205.032 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:17:42Z\",\n \"avg_ns\": 3050256925,\n \"stddev_ns\": 1983335,\n \"avg_ts\": 41.963690,\n \"stddev_ts\": 0.027290,\n \"samples_ns\": [ 3050631995, 3048112834, 3052025946 ],\n \"samples_ts\": [ 41.9585, 41.9932, 41.9394 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:17:32Z", + "avg_ns": 2497965508, + "stddev_ns": 804971, + "avg_ts": 204.966815, + "stddev_ts": 0.065795, + "samples_ns": [ + 2498778641, + 2497942472, + 2497175413 + ], + "samples_ts": [ + 204.9, + 204.969, + 205.032 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:17:42Z", + "avg_ns": 3050256925, + "stddev_ns": 1983335, + "avg_ts": 41.96369, + "stddev_ts": 0.02729, + "samples_ns": [ + 3050631995, + 3048112834, + 3052025946 + ], + "samples_ts": [ + 41.9585, + 41.9932, + 41.9394 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 78 + }, + { + "timestamp_utc": "2025-12-08T21:18:39.621832+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:17:52Z\",\n \"avg_ns\": 2527682721,\n \"stddev_ns\": 865987,\n \"avg_ts\": 202.557084,\n \"stddev_ts\": 0.069407,\n \"samples_ns\": [ 2528378798, 2527956408, 2526712957 ],\n \"samples_ts\": [ 202.501, 202.535, 202.635 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:02Z\",\n \"avg_ns\": 12434215959,\n \"stddev_ns\": 9200896,\n \"avg_ts\": 41.176717,\n \"stddev_ts\": 0.030475,\n \"samples_ns\": [ 12423892402, 12441545629, 12437209848 ],\n \"samples_ts\": [ 41.2109, 41.1524, 41.1668 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:17:52Z", + "avg_ns": 2527682721, + "stddev_ns": 865987, + "avg_ts": 202.557084, + "stddev_ts": 0.069407, + "samples_ns": [ + 2528378798, + 2527956408, + 2526712957 + ], + "samples_ts": [ + 202.501, + 202.535, + 202.635 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:18:02Z", + "avg_ns": 12434215959, + "stddev_ns": 9200896, + "avg_ts": 41.176717, + "stddev_ts": 0.030475, + "samples_ns": [ + 12423892402, + 12441545629, + 12437209848 + ], + "samples_ts": [ + 41.2109, + 41.1524, + 41.1668 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 79 + }, + { + "timestamp_utc": "2025-12-08T21:18:52.048323+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:40Z\",\n \"avg_ns\": 605447705,\n \"stddev_ns\": 168232,\n \"avg_ts\": 211.413811,\n \"stddev_ts\": 0.058119,\n \"samples_ns\": [ 605585155, 605262673, 605495288 ],\n \"samples_ts\": [ 211.366, 211.478, 211.397 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:42Z\",\n \"avg_ns\": 3069277971,\n \"stddev_ns\": 1286599,\n \"avg_ts\": 41.703624,\n \"stddev_ts\": 0.017481,\n \"samples_ns\": [ 3069184180, 3068040834, 3070608899 ],\n \"samples_ts\": [ 41.7049, 41.7204, 41.6855 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:18:40Z", + "avg_ns": 605447705, + "stddev_ns": 168232, + "avg_ts": 211.413811, + "stddev_ts": 0.058119, + "samples_ns": [ + 605585155, + 605262673, + 605495288 + ], + "samples_ts": [ + 211.366, + 211.478, + 211.397 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:18:42Z", + "avg_ns": 3069277971, + "stddev_ns": 1286599, + "avg_ts": 41.703624, + "stddev_ts": 0.017481, + "samples_ns": [ + 3069184180, + 3068040834, + 3070608899 + ], + "samples_ts": [ + 41.7049, + 41.7204, + 41.6855 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 80 + }, + { + "timestamp_utc": "2025-12-08T21:19:32.713715+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:52Z\",\n \"avg_ns\": 603832063,\n \"stddev_ns\": 286544,\n \"avg_ts\": 211.979501,\n \"stddev_ts\": 0.100620,\n \"samples_ns\": [ 603503109, 603965724, 604027356 ],\n \"samples_ts\": [ 212.095, 211.933, 211.911 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:18:55Z\",\n \"avg_ns\": 12489044123,\n \"stddev_ns\": 7020694,\n \"avg_ts\": 40.995940,\n \"stddev_ts\": 0.023033,\n \"samples_ns\": [ 12485286950, 12497141867, 12484703554 ],\n \"samples_ts\": [ 41.0083, 40.9694, 41.0102 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:18:52Z", + "avg_ns": 603832063, + "stddev_ns": 286544, + "avg_ts": 211.979501, + "stddev_ts": 0.10062, + "samples_ns": [ + 603503109, + 603965724, + 604027356 + ], + "samples_ts": [ + 212.095, + 211.933, + 211.911 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:18:55Z", + "avg_ns": 12489044123, + "stddev_ns": 7020694, + "avg_ts": 40.99594, + "stddev_ts": 0.023033, + "samples_ns": [ + 12485286950, + 12497141867, + 12484703554 + ], + "samples_ts": [ + 41.0083, + 40.9694, + 41.0102 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 81 + }, + { + "timestamp_utc": "2025-12-08T21:19:53.233600+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:19:33Z\",\n \"avg_ns\": 2620157780,\n \"stddev_ns\": 2283346,\n \"avg_ts\": 195.408178,\n \"stddev_ts\": 0.170225,\n \"samples_ns\": [ 2617774253, 2620376134, 2622322955 ],\n \"samples_ts\": [ 195.586, 195.392, 195.247 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:19:43Z\",\n \"avg_ns\": 3071141507,\n \"stddev_ns\": 2251017,\n \"avg_ts\": 41.678329,\n \"stddev_ts\": 0.030558,\n \"samples_ns\": [ 3071867955, 3072939598, 3068616968 ],\n \"samples_ts\": [ 41.6685, 41.6539, 41.7126 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:19:33Z", + "avg_ns": 2620157780, + "stddev_ns": 2283346, + "avg_ts": 195.408178, + "stddev_ts": 0.170225, + "samples_ns": [ + 2617774253, + 2620376134, + 2622322955 + ], + "samples_ts": [ + 195.586, + 195.392, + 195.247 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:19:43Z", + "avg_ns": 3071141507, + "stddev_ns": 2251017, + "avg_ts": 41.678329, + "stddev_ts": 0.030558, + "samples_ns": [ + 3071867955, + 3072939598, + 3068616968 + ], + "samples_ts": [ + 41.6685, + 41.6539, + 41.7126 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 82 + }, + { + "timestamp_utc": "2025-12-08T21:20:42.034171+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:19:53Z\",\n \"avg_ns\": 2671329519,\n \"stddev_ns\": 806857,\n \"avg_ts\": 191.664872,\n \"stddev_ts\": 0.057782,\n \"samples_ns\": [ 2670400055, 2671814744, 2671773759 ],\n \"samples_ts\": [ 191.732, 191.63, 191.633 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:20:04Z\",\n \"avg_ns\": 12444329516,\n \"stddev_ns\": 7041229,\n \"avg_ts\": 41.143246,\n \"stddev_ts\": 0.023276,\n \"samples_ns\": [ 12451916118, 12443068311, 12438004119 ],\n \"samples_ts\": [ 41.1182, 41.1474, 41.1642 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:19:53Z", + "avg_ns": 2671329519, + "stddev_ns": 806857, + "avg_ts": 191.664872, + "stddev_ts": 0.057782, + "samples_ns": [ + 2670400055, + 2671814744, + 2671773759 + ], + "samples_ts": [ + 191.732, + 191.63, + 191.633 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:20:04Z", + "avg_ns": 12444329516, + "stddev_ns": 7041229, + "avg_ts": 41.143246, + "stddev_ts": 0.023276, + "samples_ns": [ + 12451916118, + 12443068311, + 12438004119 + ], + "samples_ts": [ + 41.1182, + 41.1474, + 41.1642 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 83 + }, + { + "timestamp_utc": "2025-12-08T21:20:54.501854+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:20:42Z\",\n \"avg_ns\": 606345730,\n \"stddev_ns\": 1762075,\n \"avg_ts\": 211.101873,\n \"stddev_ts\": 0.612700,\n \"samples_ns\": [ 604863022, 605880901, 608293269 ],\n \"samples_ts\": [ 211.618, 211.263, 210.425 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:20:45Z\",\n \"avg_ns\": 3063936945,\n \"stddev_ns\": 3651070,\n \"avg_ts\": 41.776356,\n \"stddev_ts\": 0.049736,\n \"samples_ns\": [ 3061994623, 3068147663, 3061668551 ],\n \"samples_ts\": [ 41.8028, 41.719, 41.8073 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:20:42Z", + "avg_ns": 606345730, + "stddev_ns": 1762075, + "avg_ts": 211.101873, + "stddev_ts": 0.6127, + "samples_ns": [ + 604863022, + 605880901, + 608293269 + ], + "samples_ts": [ + 211.618, + 211.263, + 210.425 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:20:45Z", + "avg_ns": 3063936945, + "stddev_ns": 3651070, + "avg_ts": 41.776356, + "stddev_ts": 0.049736, + "samples_ns": [ + 3061994623, + 3068147663, + 3061668551 + ], + "samples_ts": [ + 41.8028, + 41.719, + 41.8073 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 84 + }, + { + "timestamp_utc": "2025-12-08T21:21:35.090230+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:20:55Z\",\n \"avg_ns\": 604914814,\n \"stddev_ns\": 284992,\n \"avg_ts\": 211.600073,\n \"stddev_ts\": 0.098971,\n \"samples_ns\": [ 605047518, 605106926, 604590000 ],\n \"samples_ts\": [ 211.554, 211.533, 211.714 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:20:57Z\",\n \"avg_ns\": 12463677755,\n \"stddev_ns\": 2385281,\n \"avg_ts\": 41.079369,\n \"stddev_ts\": 0.007844,\n \"samples_ns\": [ 12466421401, 12462443593, 12462168273 ],\n \"samples_ts\": [ 41.0703, 41.0834, 41.0843 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:20:55Z", + "avg_ns": 604914814, + "stddev_ns": 284992, + "avg_ts": 211.600073, + "stddev_ts": 0.098971, + "samples_ns": [ + 605047518, + 605106926, + 604590000 + ], + "samples_ts": [ + 211.554, + 211.533, + 211.714 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:20:57Z", + "avg_ns": 12463677755, + "stddev_ns": 2385281, + "avg_ts": 41.079369, + "stddev_ts": 0.007844, + "samples_ns": [ + 12466421401, + 12462443593, + 12462168273 + ], + "samples_ts": [ + 41.0703, + 41.0834, + 41.0843 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 85 + }, + { + "timestamp_utc": "2025-12-08T21:21:54.907579+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:21:35Z\",\n \"avg_ns\": 2459432071,\n \"stddev_ns\": 265340,\n \"avg_ts\": 208.178144,\n \"stddev_ts\": 0.021660,\n \"samples_ns\": [ 2459724810, 2459250848, 2459320557 ],\n \"samples_ts\": [ 208.153, 208.193, 208.188 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:21:45Z\",\n \"avg_ns\": 3064927655,\n \"stddev_ns\": 1850385,\n \"avg_ts\": 41.762823,\n \"stddev_ts\": 0.025210,\n \"samples_ns\": [ 3062823416, 3066295991, 3065663559 ],\n \"samples_ts\": [ 41.7915, 41.7442, 41.7528 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:21:35Z", + "avg_ns": 2459432071, + "stddev_ns": 265340, + "avg_ts": 208.178144, + "stddev_ts": 0.02166, + "samples_ns": [ + 2459724810, + 2459250848, + 2459320557 + ], + "samples_ts": [ + 208.153, + 208.193, + 208.188 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:21:45Z", + "avg_ns": 3064927655, + "stddev_ns": 1850385, + "avg_ts": 41.762823, + "stddev_ts": 0.02521, + "samples_ns": [ + 3062823416, + 3066295991, + 3065663559 + ], + "samples_ts": [ + 41.7915, + 41.7442, + 41.7528 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 86 + }, + { + "timestamp_utc": "2025-12-08T21:22:42.926788+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:21:55Z\",\n \"avg_ns\": 2468732848,\n \"stddev_ns\": 627355,\n \"avg_ts\": 207.393855,\n \"stddev_ts\": 0.052364,\n \"samples_ns\": [ 2468406103, 2468340735, 2469451708 ],\n \"samples_ts\": [ 207.421, 207.427, 207.333 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:05Z\",\n \"avg_ns\": 12454547854,\n \"stddev_ns\": 4477551,\n \"avg_ts\": 41.109485,\n \"stddev_ts\": 0.014777,\n \"samples_ns\": [ 12452772549, 12459640823, 12451230190 ],\n \"samples_ts\": [ 41.1153, 41.0927, 41.1204 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:21:55Z", + "avg_ns": 2468732848, + "stddev_ns": 627355, + "avg_ts": 207.393855, + "stddev_ts": 0.052364, + "samples_ns": [ + 2468406103, + 2468340735, + 2469451708 + ], + "samples_ts": [ + 207.421, + 207.427, + 207.333 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:22:05Z", + "avg_ns": 12454547854, + "stddev_ns": 4477551, + "avg_ts": 41.109485, + "stddev_ts": 0.014777, + "samples_ns": [ + 12452772549, + 12459640823, + 12451230190 + ], + "samples_ts": [ + 41.1153, + 41.0927, + 41.1204 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 87 + }, + { + "timestamp_utc": "2025-12-08T21:22:55.307421+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:43Z\",\n \"avg_ns\": 605140850,\n \"stddev_ns\": 188497,\n \"avg_ts\": 211.521017,\n \"stddev_ts\": 0.065883,\n \"samples_ns\": [ 605340486, 605116134, 604965930 ],\n \"samples_ts\": [ 211.451, 211.53, 211.582 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:46Z\",\n \"avg_ns\": 3052521802,\n \"stddev_ns\": 2957533,\n \"avg_ts\": 41.932569,\n \"stddev_ts\": 0.040628,\n \"samples_ns\": [ 3049394072, 3055271808, 3052899527 ],\n \"samples_ts\": [ 41.9756, 41.8948, 41.9274 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:22:43Z", + "avg_ns": 605140850, + "stddev_ns": 188497, + "avg_ts": 211.521017, + "stddev_ts": 0.065883, + "samples_ns": [ + 605340486, + 605116134, + 604965930 + ], + "samples_ts": [ + 211.451, + 211.53, + 211.582 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:22:46Z", + "avg_ns": 3052521802, + "stddev_ns": 2957533, + "avg_ts": 41.932569, + "stddev_ts": 0.040628, + "samples_ns": [ + 3049394072, + 3055271808, + 3052899527 + ], + "samples_ts": [ + 41.9756, + 41.8948, + 41.9274 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 88 + }, + { + "timestamp_utc": "2025-12-08T21:23:35.982771+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:55Z\",\n \"avg_ns\": 605921966,\n \"stddev_ns\": 333598,\n \"avg_ts\": 211.248367,\n \"stddev_ts\": 0.115982,\n \"samples_ns\": [ 606266461, 605602495, 605896943 ],\n \"samples_ts\": [ 211.128, 211.36, 211.257 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:22:58Z\",\n \"avg_ns\": 12483956811,\n \"stddev_ns\": 4566686,\n \"avg_ts\": 41.012642,\n \"stddev_ts\": 0.014996,\n \"samples_ns\": [ 12487825230, 12478923221, 12485121984 ],\n \"samples_ts\": [ 40.9999, 41.0292, 41.0088 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:22:55Z", + "avg_ns": 605921966, + "stddev_ns": 333598, + "avg_ts": 211.248367, + "stddev_ts": 0.115982, + "samples_ns": [ + 606266461, + 605602495, + 605896943 + ], + "samples_ts": [ + 211.128, + 211.36, + 211.257 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:22:58Z", + "avg_ns": 12483956811, + "stddev_ns": 4566686, + "avg_ts": 41.012642, + "stddev_ts": 0.014996, + "samples_ns": [ + 12487825230, + 12478923221, + 12485121984 + ], + "samples_ts": [ + 40.9999, + 41.0292, + 41.0088 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 89 + }, + { + "timestamp_utc": "2025-12-08T21:23:55.875034+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:23:36Z\",\n \"avg_ns\": 2487597648,\n \"stddev_ns\": 791717,\n \"avg_ts\": 205.821080,\n \"stddev_ts\": 0.065502,\n \"samples_ns\": [ 2488438284, 2487488480, 2486866180 ],\n \"samples_ts\": [ 205.752, 205.83, 205.882 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:23:46Z\",\n \"avg_ns\": 3053552136,\n \"stddev_ns\": 2331472,\n \"avg_ts\": 41.918410,\n \"stddev_ts\": 0.031996,\n \"samples_ns\": [ 3051250513, 3055911091, 3053494805 ],\n \"samples_ts\": [ 41.95, 41.886, 41.9192 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:23:36Z", + "avg_ns": 2487597648, + "stddev_ns": 791717, + "avg_ts": 205.82108, + "stddev_ts": 0.065502, + "samples_ns": [ + 2488438284, + 2487488480, + 2486866180 + ], + "samples_ts": [ + 205.752, + 205.83, + 205.882 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:23:46Z", + "avg_ns": 3053552136, + "stddev_ns": 2331472, + "avg_ts": 41.91841, + "stddev_ts": 0.031996, + "samples_ns": [ + 3051250513, + 3055911091, + 3053494805 + ], + "samples_ts": [ + 41.95, + 41.886, + 41.9192 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 90 + }, + { + "timestamp_utc": "2025-12-08T21:24:44.049778+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:23:56Z\",\n \"avg_ns\": 2526881910,\n \"stddev_ns\": 833221,\n \"avg_ts\": 202.621276,\n \"stddev_ts\": 0.066562,\n \"samples_ns\": [ 2527795084, 2526172757, 2526677891 ],\n \"samples_ts\": [ 202.548, 202.678, 202.638 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:06Z\",\n \"avg_ns\": 12430998670,\n \"stddev_ns\": 2234043,\n \"avg_ts\": 41.187359,\n \"stddev_ts\": 0.007383,\n \"samples_ns\": [ 12429634594, 12429791117, 12433570301 ],\n \"samples_ts\": [ 41.1919, 41.1914, 41.1788 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:23:56Z", + "avg_ns": 2526881910, + "stddev_ns": 833221, + "avg_ts": 202.621276, + "stddev_ts": 0.066562, + "samples_ns": [ + 2527795084, + 2526172757, + 2526677891 + ], + "samples_ts": [ + 202.548, + 202.678, + 202.638 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:24:06Z", + "avg_ns": 12430998670, + "stddev_ns": 2234043, + "avg_ts": 41.187359, + "stddev_ts": 0.007383, + "samples_ns": [ + 12429634594, + 12429791117, + 12433570301 + ], + "samples_ts": [ + 41.1919, + 41.1914, + 41.1788 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 91 + }, + { + "timestamp_utc": "2025-12-08T21:24:56.465620+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:44Z\",\n \"avg_ns\": 606170438,\n \"stddev_ns\": 4336557,\n \"avg_ts\": 211.168912,\n \"stddev_ts\": 1.505313,\n \"samples_ns\": [ 604472253, 611098948, 602940115 ],\n \"samples_ts\": [ 211.755, 209.459, 212.293 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:47Z\",\n \"avg_ns\": 3070360324,\n \"stddev_ns\": 2696786,\n \"avg_ts\": 41.688939,\n \"stddev_ts\": 0.036609,\n \"samples_ns\": [ 3067461731, 3072792033, 3070827210 ],\n \"samples_ts\": [ 41.7283, 41.6559, 41.6826 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:24:44Z", + "avg_ns": 606170438, + "stddev_ns": 4336557, + "avg_ts": 211.168912, + "stddev_ts": 1.505313, + "samples_ns": [ + 604472253, + 611098948, + 602940115 + ], + "samples_ts": [ + 211.755, + 209.459, + 212.293 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:24:47Z", + "avg_ns": 3070360324, + "stddev_ns": 2696786, + "avg_ts": 41.688939, + "stddev_ts": 0.036609, + "samples_ns": [ + 3067461731, + 3072792033, + 3070827210 + ], + "samples_ts": [ + 41.7283, + 41.6559, + 41.6826 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 92 + }, + { + "timestamp_utc": "2025-12-08T21:25:37.003549+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:57Z\",\n \"avg_ns\": 607358633,\n \"stddev_ns\": 667669,\n \"avg_ts\": 210.748800,\n \"stddev_ts\": 0.231550,\n \"samples_ns\": [ 608116872, 607100291, 606858736 ],\n \"samples_ts\": [ 210.486, 210.838, 210.922 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:24:59Z\",\n \"avg_ns\": 12435312691,\n \"stddev_ns\": 9498085,\n \"avg_ts\": 41.173086,\n \"stddev_ts\": 0.031439,\n \"samples_ns\": [ 12432640777, 12427436738, 12445860558 ],\n \"samples_ts\": [ 41.1819, 41.1992, 41.1382 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:24:57Z", + "avg_ns": 607358633, + "stddev_ns": 667669, + "avg_ts": 210.7488, + "stddev_ts": 0.23155, + "samples_ns": [ + 608116872, + 607100291, + 606858736 + ], + "samples_ts": [ + 210.486, + 210.838, + 210.922 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:24:59Z", + "avg_ns": 12435312691, + "stddev_ns": 9498085, + "avg_ts": 41.173086, + "stddev_ts": 0.031439, + "samples_ns": [ + 12432640777, + 12427436738, + 12445860558 + ], + "samples_ts": [ + 41.1819, + 41.1992, + 41.1382 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 93 + }, + { + "timestamp_utc": "2025-12-08T21:25:57.448288+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:25:37Z\",\n \"avg_ns\": 2624255371,\n \"stddev_ns\": 820531,\n \"avg_ts\": 195.102976,\n \"stddev_ts\": 0.061004,\n \"samples_ns\": [ 2625069287, 2624268445, 2623428381 ],\n \"samples_ts\": [ 195.042, 195.102, 195.164 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:25:48Z\",\n \"avg_ns\": 3055043471,\n \"stddev_ns\": 515058,\n \"avg_ts\": 41.897932,\n \"stddev_ts\": 0.006982,\n \"samples_ns\": [ 3054943882, 3055595000, 3054591533 ],\n \"samples_ts\": [ 41.8993, 41.8904, 41.9041 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:25:37Z", + "avg_ns": 2624255371, + "stddev_ns": 820531, + "avg_ts": 195.102976, + "stddev_ts": 0.061004, + "samples_ns": [ + 2625069287, + 2624268445, + 2623428381 + ], + "samples_ts": [ + 195.042, + 195.102, + 195.164 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:25:48Z", + "avg_ns": 3055043471, + "stddev_ns": 515058, + "avg_ts": 41.897932, + "stddev_ts": 0.006982, + "samples_ns": [ + 3054943882, + 3055595000, + 3054591533 + ], + "samples_ts": [ + 41.8993, + 41.8904, + 41.9041 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 94 + }, + { + "timestamp_utc": "2025-12-08T21:26:46.194631+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:25:58Z\",\n \"avg_ns\": 2665480577,\n \"stddev_ns\": 1202149,\n \"avg_ts\": 192.085462,\n \"stddev_ts\": 0.086609,\n \"samples_ns\": [ 2664824526, 2666868018, 2664749187 ],\n \"samples_ts\": [ 192.133, 191.986, 192.138 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:26:08Z\",\n \"avg_ns\": 12431813802,\n \"stddev_ns\": 2483945,\n \"avg_ts\": 41.184659,\n \"stddev_ts\": 0.008228,\n \"samples_ns\": [ 12434680420, 12430463338, 12430297648 ],\n \"samples_ts\": [ 41.1752, 41.1891, 41.1897 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:25:58Z", + "avg_ns": 2665480577, + "stddev_ns": 1202149, + "avg_ts": 192.085462, + "stddev_ts": 0.086609, + "samples_ns": [ + 2664824526, + 2666868018, + 2664749187 + ], + "samples_ts": [ + 192.133, + 191.986, + 192.138 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:26:08Z", + "avg_ns": 12431813802, + "stddev_ns": 2483945, + "avg_ts": 41.184659, + "stddev_ts": 0.008228, + "samples_ns": [ + 12434680420, + 12430463338, + 12430297648 + ], + "samples_ts": [ + 41.1752, + 41.1891, + 41.1897 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 95 + }, + { + "timestamp_utc": "2025-12-08T21:26:58.586514+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:26:46Z\",\n \"avg_ns\": 604047732,\n \"stddev_ns\": 760262,\n \"avg_ts\": 211.904007,\n \"stddev_ts\": 0.266416,\n \"samples_ns\": [ 603448289, 603792570, 604902338 ],\n \"samples_ts\": [ 212.114, 211.993, 211.604 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:26:49Z\",\n \"avg_ns\": 3064181987,\n \"stddev_ns\": 1595179,\n \"avg_ts\": 41.772983,\n \"stddev_ts\": 0.021733,\n \"samples_ns\": [ 3064164968, 3062596346, 3065784648 ],\n \"samples_ts\": [ 41.7732, 41.7946, 41.7511 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:26:46Z", + "avg_ns": 604047732, + "stddev_ns": 760262, + "avg_ts": 211.904007, + "stddev_ts": 0.266416, + "samples_ns": [ + 603448289, + 603792570, + 604902338 + ], + "samples_ts": [ + 212.114, + 211.993, + 211.604 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:26:49Z", + "avg_ns": 3064181987, + "stddev_ns": 1595179, + "avg_ts": 41.772983, + "stddev_ts": 0.021733, + "samples_ns": [ + 3064164968, + 3062596346, + 3065784648 + ], + "samples_ts": [ + 41.7732, + 41.7946, + 41.7511 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 96 + }, + { + "timestamp_utc": "2025-12-08T21:27:39.100458+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:26:59Z\",\n \"avg_ns\": 605413819,\n \"stddev_ns\": 284857,\n \"avg_ts\": 211.425664,\n \"stddev_ts\": 0.098744,\n \"samples_ns\": [ 605673432, 605112595, 605455432 ],\n \"samples_ts\": [ 211.335, 211.531, 211.411 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:01Z\",\n \"avg_ns\": 12435093870,\n \"stddev_ns\": 2769534,\n \"avg_ts\": 41.173796,\n \"stddev_ts\": 0.009162,\n \"samples_ns\": [ 12433690145, 12438281704, 12433309762 ],\n \"samples_ts\": [ 41.1784, 41.1632, 41.1797 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:26:59Z", + "avg_ns": 605413819, + "stddev_ns": 284857, + "avg_ts": 211.425664, + "stddev_ts": 0.098744, + "samples_ns": [ + 605673432, + 605112595, + 605455432 + ], + "samples_ts": [ + 211.335, + 211.531, + 211.411 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:27:01Z", + "avg_ns": 12435093870, + "stddev_ns": 2769534, + "avg_ts": 41.173796, + "stddev_ts": 0.009162, + "samples_ns": [ + 12433690145, + 12438281704, + 12433309762 + ], + "samples_ts": [ + 41.1784, + 41.1632, + 41.1797 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 97 + }, + { + "timestamp_utc": "2025-12-08T21:27:58.945879+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:39Z\",\n \"avg_ns\": 2462925863,\n \"stddev_ns\": 1031724,\n \"avg_ts\": 207.882855,\n \"stddev_ts\": 0.087065,\n \"samples_ns\": [ 2462552645, 2464092261, 2462132683 ],\n \"samples_ts\": [ 207.914, 207.784, 207.95 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:49Z\",\n \"avg_ns\": 3054561311,\n \"stddev_ns\": 2390522,\n \"avg_ts\": 41.904561,\n \"stddev_ts\": 0.032777,\n \"samples_ns\": [ 3057201425, 3052545630, 3053936879 ],\n \"samples_ts\": [ 41.8684, 41.9322, 41.9131 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:27:39Z", + "avg_ns": 2462925863, + "stddev_ns": 1031724, + "avg_ts": 207.882855, + "stddev_ts": 0.087065, + "samples_ns": [ + 2462552645, + 2464092261, + 2462132683 + ], + "samples_ts": [ + 207.914, + 207.784, + 207.95 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:27:49Z", + "avg_ns": 3054561311, + "stddev_ns": 2390522, + "avg_ts": 41.904561, + "stddev_ts": 0.032777, + "samples_ns": [ + 3057201425, + 3052545630, + 3053936879 + ], + "samples_ts": [ + 41.8684, + 41.9322, + 41.9131 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 98 + }, + { + "timestamp_utc": "2025-12-08T21:28:47.057852+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:27:59Z\",\n \"avg_ns\": 2464081121,\n \"stddev_ns\": 741571,\n \"avg_ts\": 207.785380,\n \"stddev_ts\": 0.062523,\n \"samples_ns\": [ 2464933752, 2463723310, 2463586301 ],\n \"samples_ts\": [ 207.713, 207.816, 207.827 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:09Z\",\n \"avg_ns\": 12489619715,\n \"stddev_ns\": 9327394,\n \"avg_ts\": 40.994058,\n \"stddev_ts\": 0.030602,\n \"samples_ns\": [ 12481812515, 12487099841, 12499946791 ],\n \"samples_ts\": [ 41.0197, 41.0023, 40.9602 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:27:59Z", + "avg_ns": 2464081121, + "stddev_ns": 741571, + "avg_ts": 207.78538, + "stddev_ts": 0.062523, + "samples_ns": [ + 2464933752, + 2463723310, + 2463586301 + ], + "samples_ts": [ + 207.713, + 207.816, + 207.827 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:28:09Z", + "avg_ns": 12489619715, + "stddev_ns": 9327394, + "avg_ts": 40.994058, + "stddev_ts": 0.030602, + "samples_ns": [ + 12481812515, + 12487099841, + 12499946791 + ], + "samples_ts": [ + 41.0197, + 41.0023, + 40.9602 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 99 + }, + { + "timestamp_utc": "2025-12-08T21:28:59.467055+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:47Z\",\n \"avg_ns\": 605159035,\n \"stddev_ns\": 225689,\n \"avg_ts\": 211.514667,\n \"stddev_ts\": 0.078408,\n \"samples_ns\": [ 605131213, 604949899, 605395994 ],\n \"samples_ts\": [ 211.524, 211.588, 211.432 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:28:50Z\",\n \"avg_ns\": 3058915154,\n \"stddev_ns\": 2221862,\n \"avg_ts\": 41.844915,\n \"stddev_ts\": 0.030368,\n \"samples_ns\": [ 3056995502, 3058402991, 3061346971 ],\n \"samples_ts\": [ 41.8712, 41.8519, 41.8117 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:28:47Z", + "avg_ns": 605159035, + "stddev_ns": 225689, + "avg_ts": 211.514667, + "stddev_ts": 0.078408, + "samples_ns": [ + 605131213, + 604949899, + 605395994 + ], + "samples_ts": [ + 211.524, + 211.588, + 211.432 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:28:50Z", + "avg_ns": 3058915154, + "stddev_ns": 2221862, + "avg_ts": 41.844915, + "stddev_ts": 0.030368, + "samples_ns": [ + 3056995502, + 3058402991, + 3061346971 + ], + "samples_ts": [ + 41.8712, + 41.8519, + 41.8117 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 100 + }, + { + "timestamp_utc": "2025-12-08T21:29:39.995310+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:29:00Z\",\n \"avg_ns\": 603818013,\n \"stddev_ns\": 348133,\n \"avg_ts\": 211.984448,\n \"stddev_ts\": 0.122198,\n \"samples_ns\": [ 603514492, 603741512, 604198035 ],\n \"samples_ts\": [ 212.091, 212.011, 211.851 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:29:02Z\",\n \"avg_ns\": 12442476918,\n \"stddev_ns\": 7776455,\n \"avg_ts\": 41.149374,\n \"stddev_ts\": 0.025725,\n \"samples_ns\": [ 12433778923, 12444894099, 12448757732 ],\n \"samples_ts\": [ 41.1781, 41.1414, 41.1286 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:29:00Z", + "avg_ns": 603818013, + "stddev_ns": 348133, + "avg_ts": 211.984448, + "stddev_ts": 0.122198, + "samples_ns": [ + 603514492, + 603741512, + 604198035 + ], + "samples_ts": [ + 212.091, + 212.011, + 211.851 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:29:02Z", + "avg_ns": 12442476918, + "stddev_ns": 7776455, + "avg_ts": 41.149374, + "stddev_ts": 0.025725, + "samples_ns": [ + 12433778923, + 12444894099, + 12448757732 + ], + "samples_ts": [ + 41.1781, + 41.1414, + 41.1286 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 101 + }, + { + "timestamp_utc": "2025-12-08T21:30:00.059677+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:29:40Z\",\n \"avg_ns\": 2526000887,\n \"stddev_ns\": 1185865,\n \"avg_ts\": 202.691962,\n \"stddev_ts\": 0.094997,\n \"samples_ns\": [ 2527070084, 2524728835, 2526203744 ],\n \"samples_ts\": [ 202.606, 202.794, 202.676 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:29:50Z\",\n \"avg_ns\": 3057721373,\n \"stddev_ns\": 1241809,\n \"avg_ts\": 41.861242,\n \"stddev_ts\": 0.017004,\n \"samples_ns\": [ 3058698428, 3058141753, 3056323938 ],\n \"samples_ts\": [ 41.8479, 41.8555, 41.8804 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:29:40Z", + "avg_ns": 2526000887, + "stddev_ns": 1185865, + "avg_ts": 202.691962, + "stddev_ts": 0.094997, + "samples_ns": [ + 2527070084, + 2524728835, + 2526203744 + ], + "samples_ts": [ + 202.606, + 202.794, + 202.676 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:29:50Z", + "avg_ns": 3057721373, + "stddev_ns": 1241809, + "avg_ts": 41.861242, + "stddev_ts": 0.017004, + "samples_ns": [ + 3058698428, + 3058141753, + 3056323938 + ], + "samples_ts": [ + 41.8479, + 41.8555, + 41.8804 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 102 + }, + { + "timestamp_utc": "2025-12-08T21:30:48.277456+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:00Z\",\n \"avg_ns\": 2524466405,\n \"stddev_ns\": 2291597,\n \"avg_ts\": 202.815248,\n \"stddev_ts\": 0.184111,\n \"samples_ns\": [ 2526000223, 2525565488, 2521833506 ],\n \"samples_ts\": [ 202.692, 202.727, 203.027 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:10Z\",\n \"avg_ns\": 12442378848,\n \"stddev_ns\": 4613914,\n \"avg_ts\": 41.149691,\n \"stddev_ts\": 0.015261,\n \"samples_ns\": [ 12446530325, 12437411374, 12443194845 ],\n \"samples_ts\": [ 41.136, 41.1661, 41.147 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:30:00Z", + "avg_ns": 2524466405, + "stddev_ns": 2291597, + "avg_ts": 202.815248, + "stddev_ts": 0.184111, + "samples_ns": [ + 2526000223, + 2525565488, + 2521833506 + ], + "samples_ts": [ + 202.692, + 202.727, + 203.027 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:30:10Z", + "avg_ns": 12442378848, + "stddev_ns": 4613914, + "avg_ts": 41.149691, + "stddev_ts": 0.015261, + "samples_ns": [ + 12446530325, + 12437411374, + 12443194845 + ], + "samples_ts": [ + 41.136, + 41.1661, + 41.147 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 103 + }, + { + "timestamp_utc": "2025-12-08T21:31:00.629359+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:48Z\",\n \"avg_ns\": 604293907,\n \"stddev_ns\": 369762,\n \"avg_ts\": 211.817512,\n \"stddev_ts\": 0.129592,\n \"samples_ns\": [ 604688817, 603955886, 604237018 ],\n \"samples_ts\": [ 211.679, 211.936, 211.837 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:30:51Z\",\n \"avg_ns\": 3048637801,\n \"stddev_ns\": 701829,\n \"avg_ts\": 41.985966,\n \"stddev_ts\": 0.009636,\n \"samples_ns\": [ 3049303035, 3048702191, 3047908178 ],\n \"samples_ts\": [ 41.9768, 41.9851, 41.996 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:30:48Z", + "avg_ns": 604293907, + "stddev_ns": 369762, + "avg_ts": 211.817512, + "stddev_ts": 0.129592, + "samples_ns": [ + 604688817, + 603955886, + 604237018 + ], + "samples_ts": [ + 211.679, + 211.936, + 211.837 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:30:51Z", + "avg_ns": 3048637801, + "stddev_ns": 701829, + "avg_ts": 41.985966, + "stddev_ts": 0.009636, + "samples_ns": [ + 3049303035, + 3048702191, + 3047908178 + ], + "samples_ts": [ + 41.9768, + 41.9851, + 41.996 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 104 + }, + { + "timestamp_utc": "2025-12-08T21:31:41.212226+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:31:01Z\",\n \"avg_ns\": 603444571,\n \"stddev_ns\": 173794,\n \"avg_ts\": 212.115599,\n \"stddev_ts\": 0.060467,\n \"samples_ns\": [ 603338484, 603352150, 603643080 ],\n \"samples_ts\": [ 212.153, 212.148, 212.046 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:31:03Z\",\n \"avg_ns\": 12461893815,\n \"stddev_ns\": 1145782,\n \"avg_ts\": 41.085248,\n \"stddev_ts\": 0.003777,\n \"samples_ns\": [ 12461288502, 12461177641, 12463215302 ],\n \"samples_ts\": [ 41.0872, 41.0876, 41.0809 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:31:01Z", + "avg_ns": 603444571, + "stddev_ns": 173794, + "avg_ts": 212.115599, + "stddev_ts": 0.060467, + "samples_ns": [ + 603338484, + 603352150, + 603643080 + ], + "samples_ts": [ + 212.153, + 212.148, + 212.046 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:31:03Z", + "avg_ns": 12461893815, + "stddev_ns": 1145782, + "avg_ts": 41.085248, + "stddev_ts": 0.003777, + "samples_ns": [ + 12461288502, + 12461177641, + 12463215302 + ], + "samples_ts": [ + 41.0872, + 41.0876, + 41.0809 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 105 + }, + { + "timestamp_utc": "2025-12-08T21:32:01.611334+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:31:41Z\",\n \"avg_ns\": 2614930019,\n \"stddev_ns\": 1737526,\n \"avg_ts\": 195.798796,\n \"stddev_ts\": 0.130001,\n \"samples_ns\": [ 2616910142, 2614215128, 2613664788 ],\n \"samples_ts\": [ 195.651, 195.852, 195.894 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:31:52Z\",\n \"avg_ns\": 3050724254,\n \"stddev_ns\": 562394,\n \"avg_ts\": 41.957251,\n \"stddev_ts\": 0.007735,\n \"samples_ns\": [ 3051260144, 3050138655, 3050773963 ],\n \"samples_ts\": [ 41.9499, 41.9653, 41.9566 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:31:41Z", + "avg_ns": 2614930019, + "stddev_ns": 1737526, + "avg_ts": 195.798796, + "stddev_ts": 0.130001, + "samples_ns": [ + 2616910142, + 2614215128, + 2613664788 + ], + "samples_ts": [ + 195.651, + 195.852, + 195.894 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:31:52Z", + "avg_ns": 3050724254, + "stddev_ns": 562394, + "avg_ts": 41.957251, + "stddev_ts": 0.007735, + "samples_ns": [ + 3051260144, + 3050138655, + 3050773963 + ], + "samples_ts": [ + 41.9499, + 41.9653, + 41.9566 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 106 + }, + { + "timestamp_utc": "2025-12-08T21:32:50.145952+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:02Z\",\n \"avg_ns\": 2613378681,\n \"stddev_ns\": 355463,\n \"avg_ts\": 195.914970,\n \"stddev_ts\": 0.026089,\n \"samples_ns\": [ 2613776703, 2613131587, 2613227755 ],\n \"samples_ts\": [ 195.885, 195.933, 195.926 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:12Z\",\n \"avg_ns\": 12430610782,\n \"stddev_ns\": 8682935,\n \"avg_ts\": 41.188657,\n \"stddev_ts\": 0.028769,\n \"samples_ns\": [ 12438825010, 12421527389, 12431479949 ],\n \"samples_ts\": [ 41.1614, 41.2188, 41.1858 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:32:02Z", + "avg_ns": 2613378681, + "stddev_ns": 355463, + "avg_ts": 195.91497, + "stddev_ts": 0.026089, + "samples_ns": [ + 2613776703, + 2613131587, + 2613227755 + ], + "samples_ts": [ + 195.885, + 195.933, + 195.926 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:32:12Z", + "avg_ns": 12430610782, + "stddev_ns": 8682935, + "avg_ts": 41.188657, + "stddev_ts": 0.028769, + "samples_ns": [ + 12438825010, + 12421527389, + 12431479949 + ], + "samples_ts": [ + 41.1614, + 41.2188, + 41.1858 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 107 + }, + { + "timestamp_utc": "2025-12-08T21:33:02.819122+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:50Z\",\n \"avg_ns\": 472272391,\n \"stddev_ns\": 93295,\n \"avg_ts\": 271.030036,\n \"stddev_ts\": 0.052073,\n \"samples_ns\": [ 472342751, 472304435, 472169988 ],\n \"samples_ts\": [ 270.99, 271.012, 271.089 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:32:52Z\",\n \"avg_ns\": 3330254528,\n \"stddev_ns\": 13958623,\n \"avg_ts\": 38.435951,\n \"stddev_ts\": 0.161201,\n \"samples_ns\": [ 3315659523, 3331629260, 3343474802 ],\n \"samples_ts\": [ 38.6047, 38.4196, 38.2835 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:32:50Z", + "avg_ns": 472272391, + "stddev_ns": 93295, + "avg_ts": 271.030036, + "stddev_ts": 0.052073, + "samples_ns": [ + 472342751, + 472304435, + 472169988 + ], + "samples_ts": [ + 270.99, + 271.012, + 271.089 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:32:52Z", + "avg_ns": 3330254528, + "stddev_ns": 13958623, + "avg_ts": 38.435951, + "stddev_ts": 0.161201, + "samples_ns": [ + 3315659523, + 3331629260, + 3343474802 + ], + "samples_ts": [ + 38.6047, + 38.4196, + 38.2835 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 108 + }, + { + "timestamp_utc": "2025-12-08T21:33:46.065778+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:33:03Z\",\n \"avg_ns\": 473779682,\n \"stddev_ns\": 535501,\n \"avg_ts\": 270.167999,\n \"stddev_ts\": 0.305265,\n \"samples_ns\": [ 473958944, 473178010, 474202093 ],\n \"samples_ts\": [ 270.066, 270.511, 269.927 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:33:05Z\",\n \"avg_ns\": 13521776771,\n \"stddev_ns\": 38263472,\n \"avg_ts\": 37.865049,\n \"stddev_ts\": 0.107282,\n \"samples_ns\": [ 13478800051, 13534387215, 13552143049 ],\n \"samples_ts\": [ 37.9856, 37.8296, 37.78 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:33:03Z", + "avg_ns": 473779682, + "stddev_ns": 535501, + "avg_ts": 270.167999, + "stddev_ts": 0.305265, + "samples_ns": [ + 473958944, + 473178010, + 474202093 + ], + "samples_ts": [ + 270.066, + 270.511, + 269.927 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:33:05Z", + "avg_ns": 13521776771, + "stddev_ns": 38263472, + "avg_ts": 37.865049, + "stddev_ts": 0.107282, + "samples_ns": [ + 13478800051, + 13534387215, + 13552143049 + ], + "samples_ts": [ + 37.9856, + 37.8296, + 37.78 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 109 + }, + { + "timestamp_utc": "2025-12-08T21:34:04.617511+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:33:46Z\",\n \"avg_ns\": 1923342776,\n \"stddev_ns\": 3936382,\n \"avg_ts\": 266.203941,\n \"stddev_ts\": 0.544116,\n \"samples_ns\": [ 1927884368, 1920924589, 1921219373 ],\n \"samples_ts\": [ 265.576, 266.538, 266.497 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:33:54Z\",\n \"avg_ns\": 3341401414,\n \"stddev_ns\": 63237844,\n \"avg_ts\": 38.316393,\n \"stddev_ts\": 0.722307,\n \"samples_ns\": [ 3283354510, 3332058961, 3408790771 ],\n \"samples_ts\": [ 38.9845, 38.4147, 37.55 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:33:46Z", + "avg_ns": 1923342776, + "stddev_ns": 3936382, + "avg_ts": 266.203941, + "stddev_ts": 0.544116, + "samples_ns": [ + 1927884368, + 1920924589, + 1921219373 + ], + "samples_ts": [ + 265.576, + 266.538, + 266.497 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:33:54Z", + "avg_ns": 3341401414, + "stddev_ns": 63237844, + "avg_ts": 38.316393, + "stddev_ts": 0.722307, + "samples_ns": [ + 3283354510, + 3332058961, + 3408790771 + ], + "samples_ts": [ + 38.9845, + 38.4147, + 37.55 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 110 + }, + { + "timestamp_utc": "2025-12-08T21:34:55.076208+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:34:05Z\",\n \"avg_ns\": 1919202906,\n \"stddev_ns\": 4359550,\n \"avg_ts\": 266.778336,\n \"stddev_ts\": 0.605145,\n \"samples_ns\": [ 1924235193, 1916781152, 1916592375 ],\n \"samples_ts\": [ 266.08, 267.114, 267.141 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:34:13Z\",\n \"avg_ns\": 13984311366,\n \"stddev_ns\": 108551428,\n \"avg_ts\": 36.613933,\n \"stddev_ts\": 0.285200,\n \"samples_ns\": [ 13862265302, 14020596140, 14070072656 ],\n \"samples_ts\": [ 36.9348, 36.5177, 36.3893 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:34:05Z", + "avg_ns": 1919202906, + "stddev_ns": 4359550, + "avg_ts": 266.778336, + "stddev_ts": 0.605145, + "samples_ns": [ + 1924235193, + 1916781152, + 1916592375 + ], + "samples_ts": [ + 266.08, + 267.114, + 267.141 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:34:13Z", + "avg_ns": 13984311366, + "stddev_ns": 108551428, + "avg_ts": 36.613933, + "stddev_ts": 0.2852, + "samples_ns": [ + 13862265302, + 14020596140, + 14070072656 + ], + "samples_ts": [ + 36.9348, + 36.5177, + 36.3893 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 111 + }, + { + "timestamp_utc": "2025-12-08T21:35:08.031014+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:34:55Z\",\n \"avg_ns\": 476679239,\n \"stddev_ns\": 3711477,\n \"avg_ts\": 268.535210,\n \"stddev_ts\": 2.085377,\n \"samples_ns\": [ 475794978, 480752919, 473489821 ],\n \"samples_ts\": [ 269.023, 266.249, 270.333 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:34:57Z\",\n \"avg_ns\": 3418477277,\n \"stddev_ns\": 62137279,\n \"avg_ts\": 37.451908,\n \"stddev_ts\": 0.687945,\n \"samples_ns\": [ 3346765091, 3456347405, 3452319337 ],\n \"samples_ts\": [ 38.2459, 37.0333, 37.0765 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:34:55Z", + "avg_ns": 476679239, + "stddev_ns": 3711477, + "avg_ts": 268.53521, + "stddev_ts": 2.085377, + "samples_ns": [ + 475794978, + 480752919, + 473489821 + ], + "samples_ts": [ + 269.023, + 266.249, + 270.333 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:34:57Z", + "avg_ns": 3418477277, + "stddev_ns": 62137279, + "avg_ts": 37.451908, + "stddev_ts": 0.687945, + "samples_ns": [ + 3346765091, + 3456347405, + 3452319337 + ], + "samples_ts": [ + 38.2459, + 37.0333, + 37.0765 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 112 + }, + { + "timestamp_utc": "2025-12-08T21:35:52.997932+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:35:08Z\",\n \"avg_ns\": 473487570,\n \"stddev_ns\": 1264074,\n \"avg_ts\": 270.335728,\n \"stddev_ts\": 0.720534,\n \"samples_ns\": [ 472555753, 474925963, 472980996 ],\n \"samples_ts\": [ 270.868, 269.516, 270.624 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:35:10Z\",\n \"avg_ns\": 14085111092,\n \"stddev_ns\": 71860879,\n \"avg_ts\": 36.351072,\n \"stddev_ts\": 0.185572,\n \"samples_ns\": [ 14010612518, 14090715007, 14154005753 ],\n \"samples_ts\": [ 36.5437, 36.336, 36.1735 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:35:08Z", + "avg_ns": 473487570, + "stddev_ns": 1264074, + "avg_ts": 270.335728, + "stddev_ts": 0.720534, + "samples_ns": [ + 472555753, + 474925963, + 472980996 + ], + "samples_ts": [ + 270.868, + 269.516, + 270.624 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:35:10Z", + "avg_ns": 14085111092, + "stddev_ns": 71860879, + "avg_ts": 36.351072, + "stddev_ts": 0.185572, + "samples_ns": [ + 14010612518, + 14090715007, + 14154005753 + ], + "samples_ts": [ + 36.5437, + 36.336, + 36.1735 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 113 + }, + { + "timestamp_utc": "2025-12-08T21:36:11.862189+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:35:53Z\",\n \"avg_ns\": 1963513698,\n \"stddev_ns\": 7362392,\n \"avg_ts\": 260.759469,\n \"stddev_ts\": 0.975771,\n \"samples_ns\": [ 1971944368, 1960244787, 1958351940 ],\n \"samples_ts\": [ 259.642, 261.192, 261.444 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:36:01Z\",\n \"avg_ns\": 3402328550,\n \"stddev_ns\": 56626339,\n \"avg_ts\": 37.628309,\n \"stddev_ts\": 0.632273,\n \"samples_ns\": [ 3337023040, 3432164369, 3437798241 ],\n \"samples_ts\": [ 38.3575, 37.2943, 37.2331 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:35:53Z", + "avg_ns": 1963513698, + "stddev_ns": 7362392, + "avg_ts": 260.759469, + "stddev_ts": 0.975771, + "samples_ns": [ + 1971944368, + 1960244787, + 1958351940 + ], + "samples_ts": [ + 259.642, + 261.192, + 261.444 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:36:01Z", + "avg_ns": 3402328550, + "stddev_ns": 56626339, + "avg_ts": 37.628309, + "stddev_ts": 0.632273, + "samples_ns": [ + 3337023040, + 3432164369, + 3437798241 + ], + "samples_ts": [ + 38.3575, + 37.2943, + 37.2331 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 114 + }, + { + "timestamp_utc": "2025-12-08T21:37:02.388944+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:36:12Z\",\n \"avg_ns\": 1964216292,\n \"stddev_ns\": 2538490,\n \"avg_ts\": 260.664048,\n \"stddev_ts\": 0.337023,\n \"samples_ns\": [ 1965672234, 1961286009, 1965690635 ],\n \"samples_ts\": [ 260.471, 261.053, 260.468 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:36:20Z\",\n \"avg_ns\": 13947193409,\n \"stddev_ns\": 49471544,\n \"avg_ts\": 36.710203,\n \"stddev_ts\": 0.130441,\n \"samples_ns\": [ 13891036218, 13966205739, 13984338271 ],\n \"samples_ts\": [ 36.8583, 36.6599, 36.6124 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:36:12Z", + "avg_ns": 1964216292, + "stddev_ns": 2538490, + "avg_ts": 260.664048, + "stddev_ts": 0.337023, + "samples_ns": [ + 1965672234, + 1961286009, + 1965690635 + ], + "samples_ts": [ + 260.471, + 261.053, + 260.468 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:36:20Z", + "avg_ns": 13947193409, + "stddev_ns": 49471544, + "avg_ts": 36.710203, + "stddev_ts": 0.130441, + "samples_ns": [ + 13891036218, + 13966205739, + 13984338271 + ], + "samples_ts": [ + 36.8583, + 36.6599, + 36.6124 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 115 + }, + { + "timestamp_utc": "2025-12-08T21:37:15.363336+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:37:03Z\",\n \"avg_ns\": 473565773,\n \"stddev_ns\": 431312,\n \"avg_ts\": 270.289952,\n \"stddev_ts\": 0.245508,\n \"samples_ns\": [ 473162412, 474018568, 473516341 ],\n \"samples_ts\": [ 270.52, 270.032, 270.318 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:37:04Z\",\n \"avg_ns\": 3427880444,\n \"stddev_ns\": 28062166,\n \"avg_ts\": 37.342535,\n \"stddev_ts\": 0.307154,\n \"samples_ns\": [ 3395477213, 3444013401, 3444150719 ],\n \"samples_ts\": [ 37.6972, 37.1659, 37.1645 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:37:03Z", + "avg_ns": 473565773, + "stddev_ns": 431312, + "avg_ts": 270.289952, + "stddev_ts": 0.245508, + "samples_ns": [ + 473162412, + 474018568, + 473516341 + ], + "samples_ts": [ + 270.52, + 270.032, + 270.318 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:37:04Z", + "avg_ns": 3427880444, + "stddev_ns": 28062166, + "avg_ts": 37.342535, + "stddev_ts": 0.307154, + "samples_ns": [ + 3395477213, + 3444013401, + 3444150719 + ], + "samples_ts": [ + 37.6972, + 37.1659, + 37.1645 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 116 + }, + { + "timestamp_utc": "2025-12-08T21:38:00.139636+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:37:16Z\",\n \"avg_ns\": 474004753,\n \"stddev_ns\": 4203016,\n \"avg_ts\": 270.053576,\n \"stddev_ts\": 2.383704,\n \"samples_ns\": [ 478795989, 472278050, 470940222 ],\n \"samples_ts\": [ 267.337, 271.027, 271.797 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:37:17Z\",\n \"avg_ns\": 14017944250,\n \"stddev_ns\": 48998977,\n \"avg_ts\": 36.524912,\n \"stddev_ts\": 0.127875,\n \"samples_ns\": [ 13962727291, 14056239230, 14034866230 ],\n \"samples_ts\": [ 36.6691, 36.4251, 36.4806 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:37:16Z", + "avg_ns": 474004753, + "stddev_ns": 4203016, + "avg_ts": 270.053576, + "stddev_ts": 2.383704, + "samples_ns": [ + 478795989, + 472278050, + 470940222 + ], + "samples_ts": [ + 267.337, + 271.027, + 271.797 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:37:17Z", + "avg_ns": 14017944250, + "stddev_ns": 48998977, + "avg_ts": 36.524912, + "stddev_ts": 0.127875, + "samples_ns": [ + 13962727291, + 14056239230, + 14034866230 + ], + "samples_ts": [ + 36.6691, + 36.4251, + 36.4806 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 117 + }, + { + "timestamp_utc": "2025-12-08T21:38:19.679707+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:38:00Z\",\n \"avg_ns\": 2125254351,\n \"stddev_ns\": 1195493,\n \"avg_ts\": 240.912391,\n \"stddev_ts\": 0.135353,\n \"samples_ns\": [ 2123900986, 2125704640, 2126157429 ],\n \"samples_ts\": [ 241.066, 240.861, 240.81 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:38:09Z\",\n \"avg_ns\": 3412167823,\n \"stddev_ns\": 50231097,\n \"avg_ts\": 37.518276,\n \"stddev_ts\": 0.557044,\n \"samples_ns\": [ 3354170021, 3440574308, 3441759141 ],\n \"samples_ts\": [ 38.1615, 37.2031, 37.1903 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:38:00Z", + "avg_ns": 2125254351, + "stddev_ns": 1195493, + "avg_ts": 240.912391, + "stddev_ts": 0.135353, + "samples_ns": [ + 2123900986, + 2125704640, + 2126157429 + ], + "samples_ts": [ + 241.066, + 240.861, + 240.81 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:38:09Z", + "avg_ns": 3412167823, + "stddev_ns": 50231097, + "avg_ts": 37.518276, + "stddev_ts": 0.557044, + "samples_ns": [ + 3354170021, + 3440574308, + 3441759141 + ], + "samples_ts": [ + 38.1615, + 37.2031, + 37.1903 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 118 + }, + { + "timestamp_utc": "2025-12-08T21:39:11.143974+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:38:20Z\",\n \"avg_ns\": 2098306806,\n \"stddev_ns\": 2557373,\n \"avg_ts\": 244.006503,\n \"stddev_ts\": 0.297133,\n \"samples_ns\": [ 2101259168, 2096857650, 2096803601 ],\n \"samples_ts\": [ 243.663, 244.175, 244.181 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:38:28Z\",\n \"avg_ns\": 14087170731,\n \"stddev_ns\": 69946572,\n \"avg_ts\": 36.345725,\n \"stddev_ts\": 0.180974,\n \"samples_ns\": [ 14006587665, 14122741456, 14132183073 ],\n \"samples_ts\": [ 36.5542, 36.2536, 36.2294 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:38:20Z", + "avg_ns": 2098306806, + "stddev_ns": 2557373, + "avg_ts": 244.006503, + "stddev_ts": 0.297133, + "samples_ns": [ + 2101259168, + 2096857650, + 2096803601 + ], + "samples_ts": [ + 243.663, + 244.175, + 244.181 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:38:28Z", + "avg_ns": 14087170731, + "stddev_ns": 69946572, + "avg_ts": 36.345725, + "stddev_ts": 0.180974, + "samples_ns": [ + 14006587665, + 14122741456, + 14132183073 + ], + "samples_ts": [ + 36.5542, + 36.2536, + 36.2294 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 119 + }, + { + "timestamp_utc": "2025-12-08T21:39:24.104212+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:39:11Z\",\n \"avg_ns\": 473317641,\n \"stddev_ns\": 576167,\n \"avg_ts\": 270.431767,\n \"stddev_ts\": 0.328748,\n \"samples_ns\": [ 472910805, 473976426, 473065693 ],\n \"samples_ts\": [ 270.664, 270.056, 270.576 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:39:13Z\",\n \"avg_ns\": 3423347923,\n \"stddev_ns\": 39596646,\n \"avg_ts\": 37.393656,\n \"stddev_ts\": 0.435385,\n \"samples_ns\": [ 3377698965, 3448412272, 3443932534 ],\n \"samples_ts\": [ 37.8956, 37.1185, 37.1668 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:39:11Z", + "avg_ns": 473317641, + "stddev_ns": 576167, + "avg_ts": 270.431767, + "stddev_ts": 0.328748, + "samples_ns": [ + 472910805, + 473976426, + 473065693 + ], + "samples_ts": [ + 270.664, + 270.056, + 270.576 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:39:13Z", + "avg_ns": 3423347923, + "stddev_ns": 39596646, + "avg_ts": 37.393656, + "stddev_ts": 0.435385, + "samples_ns": [ + 3377698965, + 3448412272, + 3443932534 + ], + "samples_ts": [ + 37.8956, + 37.1185, + 37.1668 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 120 + }, + { + "timestamp_utc": "2025-12-08T21:40:08.930678+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:39:24Z\",\n \"avg_ns\": 473442567,\n \"stddev_ns\": 1090638,\n \"avg_ts\": 270.361099,\n \"stddev_ts\": 0.622645,\n \"samples_ns\": [ 472397638, 474573788, 473356275 ],\n \"samples_ts\": [ 270.958, 269.716, 270.409 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:39:26Z\",\n \"avg_ns\": 14036424757,\n \"stddev_ns\": 74333906,\n \"avg_ts\": 36.477209,\n \"stddev_ts\": 0.193705,\n \"samples_ns\": [ 13951616487, 14090282430, 14067375355 ],\n \"samples_ts\": [ 36.6983, 36.3371, 36.3963 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:39:24Z", + "avg_ns": 473442567, + "stddev_ns": 1090638, + "avg_ts": 270.361099, + "stddev_ts": 0.622645, + "samples_ns": [ + 472397638, + 474573788, + 473356275 + ], + "samples_ts": [ + 270.958, + 269.716, + 270.409 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:39:26Z", + "avg_ns": 14036424757, + "stddev_ns": 74333906, + "avg_ts": 36.477209, + "stddev_ts": 0.193705, + "samples_ns": [ + 13951616487, + 14090282430, + 14067375355 + ], + "samples_ts": [ + 36.6983, + 36.3371, + 36.3963 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 121 + }, + { + "timestamp_utc": "2025-12-08T21:40:27.657139+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:40:09Z\",\n \"avg_ns\": 1940895187,\n \"stddev_ns\": 3139665,\n \"avg_ts\": 263.796261,\n \"stddev_ts\": 0.426560,\n \"samples_ns\": [ 1944262746, 1938048585, 1940374230 ],\n \"samples_ts\": [ 263.339, 264.183, 263.867 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:40:17Z\",\n \"avg_ns\": 3388308839,\n \"stddev_ns\": 66876154,\n \"avg_ts\": 37.786851,\n \"stddev_ts\": 0.751892,\n \"samples_ns\": [ 3313775001, 3408084423, 3443067094 ],\n \"samples_ts\": [ 38.6266, 37.5578, 37.1762 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:40:09Z", + "avg_ns": 1940895187, + "stddev_ns": 3139665, + "avg_ts": 263.796261, + "stddev_ts": 0.42656, + "samples_ns": [ + 1944262746, + 1938048585, + 1940374230 + ], + "samples_ts": [ + 263.339, + 264.183, + 263.867 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:40:17Z", + "avg_ns": 3388308839, + "stddev_ns": 66876154, + "avg_ts": 37.786851, + "stddev_ts": 0.751892, + "samples_ns": [ + 3313775001, + 3408084423, + 3443067094 + ], + "samples_ts": [ + 38.6266, + 37.5578, + 37.1762 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 122 + }, + { + "timestamp_utc": "2025-12-08T21:41:18.124691+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:40:28Z\",\n \"avg_ns\": 1923211706,\n \"stddev_ns\": 2331562,\n \"avg_ts\": 266.221602,\n \"stddev_ts\": 0.322857,\n \"samples_ns\": [ 1924676130, 1920524007, 1924434983 ],\n \"samples_ts\": [ 266.019, 266.594, 266.052 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:40:36Z\",\n \"avg_ns\": 13989453486,\n \"stddev_ns\": 48532239,\n \"avg_ts\": 36.599294,\n \"stddev_ts\": 0.127223,\n \"samples_ns\": [ 13933466484, 14019557895, 14015336080 ],\n \"samples_ts\": [ 36.7461, 36.5204, 36.5314 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:40:28Z", + "avg_ns": 1923211706, + "stddev_ns": 2331562, + "avg_ts": 266.221602, + "stddev_ts": 0.322857, + "samples_ns": [ + 1924676130, + 1920524007, + 1924434983 + ], + "samples_ts": [ + 266.019, + 266.594, + 266.052 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:40:36Z", + "avg_ns": 13989453486, + "stddev_ns": 48532239, + "avg_ts": 36.599294, + "stddev_ts": 0.127223, + "samples_ns": [ + 13933466484, + 14019557895, + 14015336080 + ], + "samples_ts": [ + 36.7461, + 36.5204, + 36.5314 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 123 + }, + { + "timestamp_utc": "2025-12-08T21:41:31.147041+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:41:18Z\",\n \"avg_ns\": 473013484,\n \"stddev_ns\": 1061295,\n \"avg_ts\": 270.606303,\n \"stddev_ts\": 0.607942,\n \"samples_ns\": [ 471788337, 473601372, 473650743 ],\n \"samples_ts\": [ 271.308, 270.269, 270.241 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:41:20Z\",\n \"avg_ns\": 3435162638,\n \"stddev_ns\": 48275020,\n \"avg_ts\": 37.266643,\n \"stddev_ts\": 0.527557,\n \"samples_ns\": [ 3380079243, 3470109428, 3455299245 ],\n \"samples_ts\": [ 37.8689, 36.8864, 37.0445 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:41:18Z", + "avg_ns": 473013484, + "stddev_ns": 1061295, + "avg_ts": 270.606303, + "stddev_ts": 0.607942, + "samples_ns": [ + 471788337, + 473601372, + 473650743 + ], + "samples_ts": [ + 271.308, + 270.269, + 270.241 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:41:20Z", + "avg_ns": 3435162638, + "stddev_ns": 48275020, + "avg_ts": 37.266643, + "stddev_ts": 0.527557, + "samples_ns": [ + 3380079243, + 3470109428, + 3455299245 + ], + "samples_ts": [ + 37.8689, + 36.8864, + 37.0445 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 124 + }, + { + "timestamp_utc": "2025-12-08T21:42:15.968904+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:41:31Z\",\n \"avg_ns\": 472972843,\n \"stddev_ns\": 835572,\n \"avg_ts\": 270.629209,\n \"stddev_ts\": 0.478390,\n \"samples_ns\": [ 472017143, 473338063, 473563324 ],\n \"samples_ts\": [ 271.177, 270.42, 270.291 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:41:33Z\",\n \"avg_ns\": 14042868853,\n \"stddev_ns\": 52734976,\n \"avg_ts\": 36.460130,\n \"stddev_ts\": 0.137176,\n \"samples_ns\": [ 13982878244, 14063820116, 14081908201 ],\n \"samples_ts\": [ 36.6162, 36.4055, 36.3587 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:41:31Z", + "avg_ns": 472972843, + "stddev_ns": 835572, + "avg_ts": 270.629209, + "stddev_ts": 0.47839, + "samples_ns": [ + 472017143, + 473338063, + 473563324 + ], + "samples_ts": [ + 271.177, + 270.42, + 270.291 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:41:33Z", + "avg_ns": 14042868853, + "stddev_ns": 52734976, + "avg_ts": 36.46013, + "stddev_ts": 0.137176, + "samples_ns": [ + 13982878244, + 14063820116, + 14081908201 + ], + "samples_ts": [ + 36.6162, + 36.4055, + 36.3587 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 125 + }, + { + "timestamp_utc": "2025-12-08T21:42:34.811555+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:42:16Z\",\n \"avg_ns\": 1962041324,\n \"stddev_ns\": 1851195,\n \"avg_ts\": 260.952865,\n \"stddev_ts\": 0.246226,\n \"samples_ns\": [ 1960150707, 1963850403, 1962122862 ],\n \"samples_ts\": [ 261.204, 260.712, 260.942 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:42:24Z\",\n \"avg_ns\": 3397784758,\n \"stddev_ns\": 64955245,\n \"avg_ts\": 37.680886,\n \"stddev_ts\": 0.728380,\n \"samples_ns\": [ 3322783218, 3435792728, 3434778329 ],\n \"samples_ts\": [ 38.5219, 37.2549, 37.2659 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:42:16Z", + "avg_ns": 1962041324, + "stddev_ns": 1851195, + "avg_ts": 260.952865, + "stddev_ts": 0.246226, + "samples_ns": [ + 1960150707, + 1963850403, + 1962122862 + ], + "samples_ts": [ + 261.204, + 260.712, + 260.942 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:42:24Z", + "avg_ns": 3397784758, + "stddev_ns": 64955245, + "avg_ts": 37.680886, + "stddev_ts": 0.72838, + "samples_ns": [ + 3322783218, + 3435792728, + 3434778329 + ], + "samples_ts": [ + 38.5219, + 37.2549, + 37.2659 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 126 + }, + { + "timestamp_utc": "2025-12-08T21:43:25.405880+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:42:35Z\",\n \"avg_ns\": 1959691577,\n \"stddev_ns\": 3365977,\n \"avg_ts\": 261.266116,\n \"stddev_ts\": 0.448418,\n \"samples_ns\": [ 1956891248, 1963425508, 1958757976 ],\n \"samples_ts\": [ 261.639, 260.769, 261.39 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:42:43Z\",\n \"avg_ns\": 13984182371,\n \"stddev_ns\": 109475959,\n \"avg_ts\": 36.614296,\n \"stddev_ts\": 0.287706,\n \"samples_ns\": [ 13860366665, 14068163393, 14024017056 ],\n \"samples_ts\": [ 36.9399, 36.3942, 36.5088 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:42:35Z", + "avg_ns": 1959691577, + "stddev_ns": 3365977, + "avg_ts": 261.266116, + "stddev_ts": 0.448418, + "samples_ns": [ + 1956891248, + 1963425508, + 1958757976 + ], + "samples_ts": [ + 261.639, + 260.769, + 261.39 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:42:43Z", + "avg_ns": 13984182371, + "stddev_ns": 109475959, + "avg_ts": 36.614296, + "stddev_ts": 0.287706, + "samples_ns": [ + 13860366665, + 14068163393, + 14024017056 + ], + "samples_ts": [ + 36.9399, + 36.3942, + 36.5088 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 127 + }, + { + "timestamp_utc": "2025-12-08T21:43:38.377503+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:43:26Z\",\n \"avg_ns\": 472515124,\n \"stddev_ns\": 311095,\n \"avg_ts\": 270.890878,\n \"stddev_ts\": 0.178342,\n \"samples_ns\": [ 472501266, 472832917, 472211189 ],\n \"samples_ts\": [ 270.899, 270.709, 271.065 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:43:27Z\",\n \"avg_ns\": 3423665683,\n \"stddev_ns\": 57128957,\n \"avg_ts\": 37.393837,\n \"stddev_ts\": 0.630039,\n \"samples_ns\": [ 3357701568, 3457160070, 3456135413 ],\n \"samples_ts\": [ 38.1213, 37.0246, 37.0356 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:43:26Z", + "avg_ns": 472515124, + "stddev_ns": 311095, + "avg_ts": 270.890878, + "stddev_ts": 0.178342, + "samples_ns": [ + 472501266, + 472832917, + 472211189 + ], + "samples_ts": [ + 270.899, + 270.709, + 271.065 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:43:27Z", + "avg_ns": 3423665683, + "stddev_ns": 57128957, + "avg_ts": 37.393837, + "stddev_ts": 0.630039, + "samples_ns": [ + 3357701568, + 3457160070, + 3456135413 + ], + "samples_ts": [ + 38.1213, + 37.0246, + 37.0356 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 128 + }, + { + "timestamp_utc": "2025-12-08T21:44:22.835130+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:43:39Z\",\n \"avg_ns\": 473152486,\n \"stddev_ns\": 454264,\n \"avg_ts\": 270.526062,\n \"stddev_ts\": 0.259584,\n \"samples_ns\": [ 472872947, 473676638, 472907873 ],\n \"samples_ts\": [ 270.686, 270.227, 270.666 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:43:40Z\",\n \"avg_ns\": 13921777851,\n \"stddev_ns\": 56132013,\n \"avg_ts\": 36.777311,\n \"stddev_ts\": 0.148588,\n \"samples_ns\": [ 13857862719, 13944411743, 13963059092 ],\n \"samples_ts\": [ 36.9465, 36.7172, 36.6682 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:43:39Z", + "avg_ns": 473152486, + "stddev_ns": 454264, + "avg_ts": 270.526062, + "stddev_ts": 0.259584, + "samples_ns": [ + 472872947, + 473676638, + 472907873 + ], + "samples_ts": [ + 270.686, + 270.227, + 270.666 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:43:40Z", + "avg_ns": 13921777851, + "stddev_ns": 56132013, + "avg_ts": 36.777311, + "stddev_ts": 0.148588, + "samples_ns": [ + 13857862719, + 13944411743, + 13963059092 + ], + "samples_ts": [ + 36.9465, + 36.7172, + 36.6682 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 129 + }, + { + "timestamp_utc": "2025-12-08T21:44:42.315942+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:44:23Z\",\n \"avg_ns\": 2124024057,\n \"stddev_ns\": 2262156,\n \"avg_ts\": 241.052066,\n \"stddev_ts\": 0.256653,\n \"samples_ns\": [ 2126344363, 2121825944, 2123901865 ],\n \"samples_ts\": [ 240.789, 241.302, 241.066 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:44:32Z\",\n \"avg_ns\": 3389563409,\n \"stddev_ns\": 73083651,\n \"avg_ts\": 37.774821,\n \"stddev_ts\": 0.824075,\n \"samples_ns\": [ 3305805796, 3422514177, 3440370256 ],\n \"samples_ts\": [ 38.7198, 37.3994, 37.2053 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:44:23Z", + "avg_ns": 2124024057, + "stddev_ns": 2262156, + "avg_ts": 241.052066, + "stddev_ts": 0.256653, + "samples_ns": [ + 2126344363, + 2121825944, + 2123901865 + ], + "samples_ts": [ + 240.789, + 241.302, + 241.066 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:44:32Z", + "avg_ns": 3389563409, + "stddev_ns": 73083651, + "avg_ts": 37.774821, + "stddev_ts": 0.824075, + "samples_ns": [ + 3305805796, + 3422514177, + 3440370256 + ], + "samples_ts": [ + 38.7198, + 37.3994, + 37.2053 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 130 + }, + { + "timestamp_utc": "2025-12-08T21:45:33.471087+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:44:43Z\",\n \"avg_ns\": 2099559505,\n \"stddev_ns\": 4797241,\n \"avg_ts\": 243.861524,\n \"stddev_ts\": 0.556550,\n \"samples_ns\": [ 2105000186, 2097739537, 2095938793 ],\n \"samples_ts\": [ 243.23, 244.072, 244.282 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:44:51Z\",\n \"avg_ns\": 13982969198,\n \"stddev_ns\": 95423452,\n \"avg_ts\": 36.617112,\n \"stddev_ts\": 0.250819,\n \"samples_ns\": [ 13873467317, 14048333285, 14027106992 ],\n \"samples_ts\": [ 36.905, 36.4456, 36.5008 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:44:43Z", + "avg_ns": 2099559505, + "stddev_ns": 4797241, + "avg_ts": 243.861524, + "stddev_ts": 0.55655, + "samples_ns": [ + 2105000186, + 2097739537, + 2095938793 + ], + "samples_ts": [ + 243.23, + 244.072, + 244.282 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:44:51Z", + "avg_ns": 13982969198, + "stddev_ns": 95423452, + "avg_ts": 36.617112, + "stddev_ts": 0.250819, + "samples_ns": [ + 13873467317, + 14048333285, + 14027106992 + ], + "samples_ts": [ + 36.905, + 36.4456, + 36.5008 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 131 + }, + { + "timestamp_utc": "2025-12-08T21:45:46.435318+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:45:34Z\",\n \"avg_ns\": 472959738,\n \"stddev_ns\": 941764,\n \"avg_ts\": 270.636860,\n \"stddev_ts\": 0.539133,\n \"samples_ns\": [ 473100826, 473822999, 471955389 ],\n \"samples_ts\": [ 270.555, 270.143, 271.212 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:45:36Z\",\n \"avg_ns\": 3425115663,\n \"stddev_ns\": 50388790,\n \"avg_ts\": 37.376434,\n \"stddev_ts\": 0.553961,\n \"samples_ns\": [ 3367805980, 3462472630, 3445068380 ],\n \"samples_ts\": [ 38.0069, 36.9678, 37.1546 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:45:34Z", + "avg_ns": 472959738, + "stddev_ns": 941764, + "avg_ts": 270.63686, + "stddev_ts": 0.539133, + "samples_ns": [ + 473100826, + 473822999, + 471955389 + ], + "samples_ts": [ + 270.555, + 270.143, + 271.212 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:45:36Z", + "avg_ns": 3425115663, + "stddev_ns": 50388790, + "avg_ts": 37.376434, + "stddev_ts": 0.553961, + "samples_ns": [ + 3367805980, + 3462472630, + 3445068380 + ], + "samples_ts": [ + 38.0069, + 36.9678, + 37.1546 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 132 + }, + { + "timestamp_utc": "2025-12-08T21:46:31.272406+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:45:47Z\",\n \"avg_ns\": 472844619,\n \"stddev_ns\": 593967,\n \"avg_ts\": 270.702318,\n \"stddev_ts\": 0.339892,\n \"samples_ns\": [ 472692538, 473499841, 472341478 ],\n \"samples_ts\": [ 270.789, 270.327, 270.99 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:45:49Z\",\n \"avg_ns\": 14048287399,\n \"stddev_ns\": 65913155,\n \"avg_ts\": 36.446260,\n \"stddev_ts\": 0.171458,\n \"samples_ns\": [ 13972322920, 14090339574, 14082199705 ],\n \"samples_ts\": [ 36.6439, 36.337, 36.358 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:45:47Z", + "avg_ns": 472844619, + "stddev_ns": 593967, + "avg_ts": 270.702318, + "stddev_ts": 0.339892, + "samples_ns": [ + 472692538, + 473499841, + 472341478 + ], + "samples_ts": [ + 270.789, + 270.327, + 270.99 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:45:49Z", + "avg_ns": 14048287399, + "stddev_ns": 65913155, + "avg_ts": 36.44626, + "stddev_ts": 0.171458, + "samples_ns": [ + 13972322920, + 14090339574, + 14082199705 + ], + "samples_ts": [ + 36.6439, + 36.337, + 36.358 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 133 + }, + { + "timestamp_utc": "2025-12-08T21:46:50.048556+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:46:31Z\",\n \"avg_ns\": 1940666402,\n \"stddev_ns\": 2220568,\n \"avg_ts\": 263.827130,\n \"stddev_ts\": 0.301623,\n \"samples_ns\": [ 1939271492, 1939501157, 1943226558 ],\n \"samples_ts\": [ 264.017, 263.985, 263.479 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:46:39Z\",\n \"avg_ns\": 3403626371,\n \"stddev_ns\": 68964006,\n \"avg_ts\": 37.617362,\n \"stddev_ts\": 0.770944,\n \"samples_ns\": [ 3324273820, 3437522785, 3449082510 ],\n \"samples_ts\": [ 38.5047, 37.2361, 37.1113 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:46:31Z", + "avg_ns": 1940666402, + "stddev_ns": 2220568, + "avg_ts": 263.82713, + "stddev_ts": 0.301623, + "samples_ns": [ + 1939271492, + 1939501157, + 1943226558 + ], + "samples_ts": [ + 264.017, + 263.985, + 263.479 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:46:39Z", + "avg_ns": 3403626371, + "stddev_ns": 68964006, + "avg_ts": 37.617362, + "stddev_ts": 0.770944, + "samples_ns": [ + 3324273820, + 3437522785, + 3449082510 + ], + "samples_ts": [ + 38.5047, + 37.2361, + 37.1113 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 134 + }, + { + "timestamp_utc": "2025-12-08T21:47:40.438423+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:46:50Z\",\n \"avg_ns\": 1924382167,\n \"stddev_ns\": 3827927,\n \"avg_ts\": 266.060119,\n \"stddev_ts\": 0.528811,\n \"samples_ns\": [ 1921103085, 1923455604, 1928587814 ],\n \"samples_ts\": [ 266.514, 266.188, 265.479 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:46:58Z\",\n \"avg_ns\": 13962419575,\n \"stddev_ns\": 44877260,\n \"avg_ts\": 36.670115,\n \"stddev_ts\": 0.117998,\n \"samples_ns\": [ 13912965009, 13973743914, 14000549803 ],\n \"samples_ts\": [ 36.8002, 36.6401, 36.57 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:46:50Z", + "avg_ns": 1924382167, + "stddev_ns": 3827927, + "avg_ts": 266.060119, + "stddev_ts": 0.528811, + "samples_ns": [ + 1921103085, + 1923455604, + 1928587814 + ], + "samples_ts": [ + 266.514, + 266.188, + 265.479 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:46:58Z", + "avg_ns": 13962419575, + "stddev_ns": 44877260, + "avg_ts": 36.670115, + "stddev_ts": 0.117998, + "samples_ns": [ + 13912965009, + 13973743914, + 14000549803 + ], + "samples_ts": [ + 36.8002, + 36.6401, + 36.57 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 135 + }, + { + "timestamp_utc": "2025-12-08T21:47:53.388566+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:47:41Z\",\n \"avg_ns\": 473106841,\n \"stddev_ns\": 651441,\n \"avg_ts\": 270.552337,\n \"stddev_ts\": 0.372417,\n \"samples_ns\": [ 473182805, 473716606, 472421113 ],\n \"samples_ts\": [ 270.509, 270.204, 270.945 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:47:43Z\",\n \"avg_ns\": 3419979192,\n \"stddev_ns\": 49098943,\n \"avg_ts\": 37.432314,\n \"stddev_ts\": 0.541867,\n \"samples_ns\": [ 3363315174, 3446701489, 3449920915 ],\n \"samples_ts\": [ 38.0577, 37.137, 37.1023 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:47:41Z", + "avg_ns": 473106841, + "stddev_ns": 651441, + "avg_ts": 270.552337, + "stddev_ts": 0.372417, + "samples_ns": [ + 473182805, + 473716606, + 472421113 + ], + "samples_ts": [ + 270.509, + 270.204, + 270.945 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:47:43Z", + "avg_ns": 3419979192, + "stddev_ns": 49098943, + "avg_ts": 37.432314, + "stddev_ts": 0.541867, + "samples_ns": [ + 3363315174, + 3446701489, + 3449920915 + ], + "samples_ts": [ + 38.0577, + 37.137, + 37.1023 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 136 + }, + { + "timestamp_utc": "2025-12-08T21:48:38.220961+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:47:54Z\",\n \"avg_ns\": 472583109,\n \"stddev_ns\": 183122,\n \"avg_ts\": 270.851857,\n \"stddev_ts\": 0.104189,\n \"samples_ns\": [ 472455127, 472791241, 472502960 ],\n \"samples_ts\": [ 270.925, 270.733, 270.898 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:47:55Z\",\n \"avg_ns\": 14047652041,\n \"stddev_ns\": 40239255,\n \"avg_ts\": 36.447571,\n \"stddev_ts\": 0.104238,\n \"samples_ns\": [ 14020673248, 14028380085, 14093902790 ],\n \"samples_ts\": [ 36.5175, 36.4974, 36.3278 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:47:54Z", + "avg_ns": 472583109, + "stddev_ns": 183122, + "avg_ts": 270.851857, + "stddev_ts": 0.104189, + "samples_ns": [ + 472455127, + 472791241, + 472502960 + ], + "samples_ts": [ + 270.925, + 270.733, + 270.898 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:47:55Z", + "avg_ns": 14047652041, + "stddev_ns": 40239255, + "avg_ts": 36.447571, + "stddev_ts": 0.104238, + "samples_ns": [ + 14020673248, + 14028380085, + 14093902790 + ], + "samples_ts": [ + 36.5175, + 36.4974, + 36.3278 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 137 + }, + { + "timestamp_utc": "2025-12-08T21:48:57.052066+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:48:38Z\",\n \"avg_ns\": 1959070334,\n \"stddev_ns\": 1800020,\n \"avg_ts\": 261.348599,\n \"stddev_ts\": 0.240089,\n \"samples_ns\": [ 1960778569, 1959240700, 1957191734 ],\n \"samples_ts\": [ 261.121, 261.326, 261.599 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:48:46Z\",\n \"avg_ns\": 3387484431,\n \"stddev_ns\": 73015738,\n \"avg_ts\": 37.798005,\n \"stddev_ts\": 0.824905,\n \"samples_ns\": [ 3303248656, 3426512776, 3432691862 ],\n \"samples_ts\": [ 38.7497, 37.3558, 37.2885 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:48:38Z", + "avg_ns": 1959070334, + "stddev_ns": 1800020, + "avg_ts": 261.348599, + "stddev_ts": 0.240089, + "samples_ns": [ + 1960778569, + 1959240700, + 1957191734 + ], + "samples_ts": [ + 261.121, + 261.326, + 261.599 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:48:46Z", + "avg_ns": 3387484431, + "stddev_ns": 73015738, + "avg_ts": 37.798005, + "stddev_ts": 0.824905, + "samples_ns": [ + 3303248656, + 3426512776, + 3432691862 + ], + "samples_ts": [ + 38.7497, + 37.3558, + 37.2885 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 138 + }, + { + "timestamp_utc": "2025-12-08T21:49:47.614359+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:48:57Z\",\n \"avg_ns\": 1961089948,\n \"stddev_ns\": 2301143,\n \"avg_ts\": 261.079544,\n \"stddev_ts\": 0.306076,\n \"samples_ns\": [ 1960312870, 1959278840, 1963678136 ],\n \"samples_ts\": [ 261.183, 261.321, 260.735 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:49:05Z\",\n \"avg_ns\": 13973516072,\n \"stddev_ns\": 28687018,\n \"avg_ts\": 36.640845,\n \"stddev_ts\": 0.075221,\n \"samples_ns\": [ 13944869379, 13973436077, 14002242761 ],\n \"samples_ts\": [ 36.716, 36.641, 36.5656 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:48:57Z", + "avg_ns": 1961089948, + "stddev_ns": 2301143, + "avg_ts": 261.079544, + "stddev_ts": 0.306076, + "samples_ns": [ + 1960312870, + 1959278840, + 1963678136 + ], + "samples_ts": [ + 261.183, + 261.321, + 260.735 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:49:05Z", + "avg_ns": 13973516072, + "stddev_ns": 28687018, + "avg_ts": 36.640845, + "stddev_ts": 0.075221, + "samples_ns": [ + 13944869379, + 13973436077, + 14002242761 + ], + "samples_ts": [ + 36.716, + 36.641, + 36.5656 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 139 + }, + { + "timestamp_utc": "2025-12-08T21:50:00.611210+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:49:48Z\",\n \"avg_ns\": 472656785,\n \"stddev_ns\": 989989,\n \"avg_ts\": 270.810402,\n \"stddev_ts\": 0.566845,\n \"samples_ns\": [ 473673301, 471696691, 472600365 ],\n \"samples_ts\": [ 270.228, 271.361, 270.842 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:49:50Z\",\n \"avg_ns\": 3434121106,\n \"stddev_ns\": 49560230,\n \"avg_ts\": 37.278220,\n \"stddev_ts\": 0.542464,\n \"samples_ns\": [ 3376957605, 3460365432, 3465040282 ],\n \"samples_ts\": [ 37.9039, 36.9903, 36.9404 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:49:48Z", + "avg_ns": 472656785, + "stddev_ns": 989989, + "avg_ts": 270.810402, + "stddev_ts": 0.566845, + "samples_ns": [ + 473673301, + 471696691, + 472600365 + ], + "samples_ts": [ + 270.228, + 271.361, + 270.842 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:49:50Z", + "avg_ns": 3434121106, + "stddev_ns": 49560230, + "avg_ts": 37.27822, + "stddev_ts": 0.542464, + "samples_ns": [ + 3376957605, + 3460365432, + 3465040282 + ], + "samples_ts": [ + 37.9039, + 36.9903, + 36.9404 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 140 + }, + { + "timestamp_utc": "2025-12-08T21:50:45.653575+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:50:01Z\",\n \"avg_ns\": 472370038,\n \"stddev_ns\": 1280679,\n \"avg_ts\": 270.975331,\n \"stddev_ts\": 0.735291,\n \"samples_ns\": [ 473448129, 470954661, 472707325 ],\n \"samples_ts\": [ 270.357, 271.788, 270.781 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:50:03Z\",\n \"avg_ns\": 14113972922,\n \"stddev_ns\": 57762461,\n \"avg_ts\": 36.276513,\n \"stddev_ts\": 0.148646,\n \"samples_ns\": [ 14051171598, 14125918580, 14164828588 ],\n \"samples_ts\": [ 36.4382, 36.2454, 36.1459 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:50:01Z", + "avg_ns": 472370038, + "stddev_ns": 1280679, + "avg_ts": 270.975331, + "stddev_ts": 0.735291, + "samples_ns": [ + 473448129, + 470954661, + 472707325 + ], + "samples_ts": [ + 270.357, + 271.788, + 270.781 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:50:03Z", + "avg_ns": 14113972922, + "stddev_ns": 57762461, + "avg_ts": 36.276513, + "stddev_ts": 0.148646, + "samples_ns": [ + 14051171598, + 14125918580, + 14164828588 + ], + "samples_ts": [ + 36.4382, + 36.2454, + 36.1459 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 141 + }, + { + "timestamp_utc": "2025-12-08T21:51:04.929843+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:50:46Z\",\n \"avg_ns\": 2073749664,\n \"stddev_ns\": 2969593,\n \"avg_ts\": 246.896098,\n \"stddev_ts\": 0.353742,\n \"samples_ns\": [ 2074545044, 2076240575, 2070463373 ],\n \"samples_ts\": [ 246.801, 246.6, 247.288 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:50:54Z\",\n \"avg_ns\": 3387569226,\n \"stddev_ns\": 51313820,\n \"avg_ts\": 37.791036,\n \"stddev_ts\": 0.577366,\n \"samples_ns\": [ 3328490586, 3421031622, 3413185471 ],\n \"samples_ts\": [ 38.4559, 37.4156, 37.5016 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:50:46Z", + "avg_ns": 2073749664, + "stddev_ns": 2969593, + "avg_ts": 246.896098, + "stddev_ts": 0.353742, + "samples_ns": [ + 2074545044, + 2076240575, + 2070463373 + ], + "samples_ts": [ + 246.801, + 246.6, + 247.288 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:50:54Z", + "avg_ns": 3387569226, + "stddev_ns": 51313820, + "avg_ts": 37.791036, + "stddev_ts": 0.577366, + "samples_ns": [ + 3328490586, + 3421031622, + 3413185471 + ], + "samples_ts": [ + 38.4559, + 37.4156, + 37.5016 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 142 + }, + { + "timestamp_utc": "2025-12-08T21:51:56.089420+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:51:05Z\",\n \"avg_ns\": 2065794507,\n \"stddev_ns\": 2369173,\n \"avg_ts\": 247.846747,\n \"stddev_ts\": 0.284107,\n \"samples_ns\": [ 2063883795, 2068445407, 2065054319 ],\n \"samples_ts\": [ 248.076, 247.529, 247.935 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 270M Q4_K - Medium\",\n \"model_size\": 246587904,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:51:13Z\",\n \"avg_ns\": 14028467314,\n \"stddev_ns\": 61347538,\n \"avg_ts\": 36.497682,\n \"stddev_ts\": 0.159983,\n \"samples_ns\": [ 13958181865, 14055964181, 14071255898 ],\n \"samples_ts\": [ 36.681, 36.4258, 36.3862 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:51:05Z", + "avg_ns": 2065794507, + "stddev_ns": 2369173, + "avg_ts": 247.846747, + "stddev_ts": 0.284107, + "samples_ns": [ + 2063883795, + 2068445407, + 2065054319 + ], + "samples_ts": [ + 248.076, + 247.529, + 247.935 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_type": "gemma3 270M Q4_K - Medium", + "model_size": 246587904, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:51:13Z", + "avg_ns": 14028467314, + "stddev_ns": 61347538, + "avg_ts": 36.497682, + "stddev_ts": 0.159983, + "samples_ns": [ + 13958181865, + 14055964181, + 14071255898 + ], + "samples_ts": [ + 36.681, + 36.4258, + 36.3862 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 143 + }, + { + "timestamp_utc": "2025-12-08T21:52:15.723824+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:52:00Z\",\n \"avg_ns\": 1086425396,\n \"stddev_ns\": 201664,\n \"avg_ts\": 117.817572,\n \"stddev_ts\": 0.021276,\n \"samples_ns\": [ 1086367656, 1086643991, 1086264543 ],\n \"samples_ts\": [ 117.824, 117.794, 117.835 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:52:04Z\",\n \"avg_ns\": 3721992773,\n \"stddev_ns\": 322151,\n \"avg_ts\": 34.390180,\n \"stddev_ts\": 0.002923,\n \"samples_ns\": [ 3722266054, 3722066009, 3721646257 ],\n \"samples_ts\": [ 34.3877, 34.3895, 34.3934 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:52:00Z", + "avg_ns": 1086425396, + "stddev_ns": 201664, + "avg_ts": 117.817572, + "stddev_ts": 0.021276, + "samples_ns": [ + 1086367656, + 1086643991, + 1086264543 + ], + "samples_ts": [ + 117.824, + 117.794, + 117.835 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:52:04Z", + "avg_ns": 3721992773, + "stddev_ns": 322151, + "avg_ts": 34.39018, + "stddev_ts": 0.002923, + "samples_ns": [ + 3722266054, + 3722066009, + 3721646257 + ], + "samples_ts": [ + 34.3877, + 34.3895, + 34.3934 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 144 + }, + { + "timestamp_utc": "2025-12-08T21:53:07.092812+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:52:16Z\",\n \"avg_ns\": 1087206228,\n \"stddev_ns\": 104540,\n \"avg_ts\": 117.732954,\n \"stddev_ts\": 0.011321,\n \"samples_ns\": [ 1087308248, 1087099338, 1087211098 ],\n \"samples_ts\": [ 117.722, 117.745, 117.732 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:52:20Z\",\n \"avg_ns\": 15395769689,\n \"stddev_ns\": 6380086,\n \"avg_ts\": 33.255892,\n \"stddev_ts\": 0.013784,\n \"samples_ns\": [ 15401081936, 15388693099, 15397534032 ],\n \"samples_ts\": [ 33.2444, 33.2712, 33.2521 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:52:16Z", + "avg_ns": 1087206228, + "stddev_ns": 104540, + "avg_ts": 117.732954, + "stddev_ts": 0.011321, + "samples_ns": [ + 1087308248, + 1087099338, + 1087211098 + ], + "samples_ts": [ + 117.722, + 117.745, + 117.732 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:52:20Z", + "avg_ns": 15395769689, + "stddev_ns": 6380086, + "avg_ts": 33.255892, + "stddev_ts": 0.013784, + "samples_ns": [ + 15401081936, + 15388693099, + 15397534032 + ], + "samples_ts": [ + 33.2444, + 33.2712, + 33.2521 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 145 + }, + { + "timestamp_utc": "2025-12-08T21:53:37.466148+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:53:07Z\",\n \"avg_ns\": 4542619115,\n \"stddev_ns\": 351969,\n \"avg_ts\": 112.710309,\n \"stddev_ts\": 0.008572,\n \"samples_ns\": [ 4542905547, 4542716332, 4542235467 ],\n \"samples_ts\": [ 112.703, 112.708, 112.72 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:53:25Z\",\n \"avg_ns\": 3805891497,\n \"stddev_ns\": 3752215,\n \"avg_ts\": 33.632089,\n \"stddev_ts\": 0.033134,\n \"samples_ns\": [ 3803691478, 3810223419, 3803759595 ],\n \"samples_ts\": [ 33.6515, 33.5938, 33.6509 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:53:07Z", + "avg_ns": 4542619115, + "stddev_ns": 351969, + "avg_ts": 112.710309, + "stddev_ts": 0.008572, + "samples_ns": [ + 4542905547, + 4542716332, + 4542235467 + ], + "samples_ts": [ + 112.703, + 112.708, + 112.72 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:53:25Z", + "avg_ns": 3805891497, + "stddev_ns": 3752215, + "avg_ts": 33.632089, + "stddev_ts": 0.033134, + "samples_ns": [ + 3803691478, + 3810223419, + 3803759595 + ], + "samples_ts": [ + 33.6515, + 33.5938, + 33.6509 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 146 + }, + { + "timestamp_utc": "2025-12-08T21:54:42.763249+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:53:38Z\",\n \"avg_ns\": 4543284506,\n \"stddev_ns\": 778774,\n \"avg_ts\": 112.693803,\n \"stddev_ts\": 0.019171,\n \"samples_ns\": [ 4543052377, 4542654254, 4544146889 ],\n \"samples_ts\": [ 112.7, 112.709, 112.672 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:53:56Z\",\n \"avg_ns\": 15445071260,\n \"stddev_ns\": 40062574,\n \"avg_ts\": 33.149883,\n \"stddev_ts\": 0.086115,\n \"samples_ns\": [ 15398823538, 15469120093, 15467270150 ],\n \"samples_ts\": [ 33.2493, 33.0982, 33.1022 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:53:38Z", + "avg_ns": 4543284506, + "stddev_ns": 778774, + "avg_ts": 112.693803, + "stddev_ts": 0.019171, + "samples_ns": [ + 4543052377, + 4542654254, + 4544146889 + ], + "samples_ts": [ + 112.7, + 112.709, + 112.672 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:53:56Z", + "avg_ns": 15445071260, + "stddev_ns": 40062574, + "avg_ts": 33.149883, + "stddev_ts": 0.086115, + "samples_ns": [ + 15398823538, + 15469120093, + 15467270150 + ], + "samples_ts": [ + 33.2493, + 33.0982, + 33.1022 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 147 + }, + { + "timestamp_utc": "2025-12-08T21:54:59.018311+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:54:43Z\",\n \"avg_ns\": 1086727154,\n \"stddev_ns\": 299756,\n \"avg_ts\": 117.784861,\n \"stddev_ts\": 0.032097,\n \"samples_ns\": [ 1086396689, 1086816396, 1086968379 ],\n \"samples_ts\": [ 117.821, 117.775, 117.759 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:54:47Z\",\n \"avg_ns\": 3709799123,\n \"stddev_ns\": 1864185,\n \"avg_ts\": 34.503222,\n \"stddev_ts\": 0.017333,\n \"samples_ns\": [ 3707662251, 3710651199, 3711083920 ],\n \"samples_ts\": [ 34.5231, 34.4953, 34.4913 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:54:43Z", + "avg_ns": 1086727154, + "stddev_ns": 299756, + "avg_ts": 117.784861, + "stddev_ts": 0.032097, + "samples_ns": [ + 1086396689, + 1086816396, + 1086968379 + ], + "samples_ts": [ + 117.821, + 117.775, + 117.759 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:54:47Z", + "avg_ns": 3709799123, + "stddev_ns": 1864185, + "avg_ts": 34.503222, + "stddev_ts": 0.017333, + "samples_ns": [ + 3707662251, + 3710651199, + 3711083920 + ], + "samples_ts": [ + 34.5231, + 34.4953, + 34.4913 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 148 + }, + { + "timestamp_utc": "2025-12-08T21:55:50.883418+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:54:59Z\",\n \"avg_ns\": 1087401346,\n \"stddev_ns\": 65150,\n \"avg_ts\": 117.711828,\n \"stddev_ts\": 0.007053,\n \"samples_ns\": [ 1087398729, 1087337543, 1087467766 ],\n \"samples_ts\": [ 117.712, 117.719, 117.705 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:55:04Z\",\n \"avg_ns\": 15577493417,\n \"stddev_ns\": 16873210,\n \"avg_ts\": 32.867958,\n \"stddev_ts\": 0.035622,\n \"samples_ns\": [ 15558206088, 15584748685, 15589525478 ],\n \"samples_ts\": [ 32.9087, 32.8526, 32.8426 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:54:59Z", + "avg_ns": 1087401346, + "stddev_ns": 65150, + "avg_ts": 117.711828, + "stddev_ts": 0.007053, + "samples_ns": [ + 1087398729, + 1087337543, + 1087467766 + ], + "samples_ts": [ + 117.712, + 117.719, + 117.705 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:55:04Z", + "avg_ns": 15577493417, + "stddev_ns": 16873210, + "avg_ts": 32.867958, + "stddev_ts": 0.035622, + "samples_ns": [ + 15558206088, + 15584748685, + 15589525478 + ], + "samples_ts": [ + 32.9087, + 32.8526, + 32.8426 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 149 + }, + { + "timestamp_utc": "2025-12-08T21:56:21.036788+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:55:51Z\",\n \"avg_ns\": 4550810653,\n \"stddev_ns\": 297999,\n \"avg_ts\": 112.507428,\n \"stddev_ts\": 0.006980,\n \"samples_ns\": [ 4551075150, 4550513380, 4550843431 ],\n \"samples_ts\": [ 112.501, 112.515, 112.507 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:56:09Z\",\n \"avg_ns\": 3721871362,\n \"stddev_ns\": 644953,\n \"avg_ts\": 34.391302,\n \"stddev_ts\": 0.005959,\n \"samples_ns\": [ 3722613910, 3721549396, 3721450780 ],\n \"samples_ts\": [ 34.3844, 34.3943, 34.3952 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:55:51Z", + "avg_ns": 4550810653, + "stddev_ns": 297999, + "avg_ts": 112.507428, + "stddev_ts": 0.00698, + "samples_ns": [ + 4551075150, + 4550513380, + 4550843431 + ], + "samples_ts": [ + 112.501, + 112.515, + 112.507 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:56:09Z", + "avg_ns": 3721871362, + "stddev_ns": 644953, + "avg_ts": 34.391302, + "stddev_ts": 0.005959, + "samples_ns": [ + 3722613910, + 3721549396, + 3721450780 + ], + "samples_ts": [ + 34.3844, + 34.3943, + 34.3952 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 150 + }, + { + "timestamp_utc": "2025-12-08T21:57:27.118630+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:56:21Z\",\n \"avg_ns\": 4556608958,\n \"stddev_ns\": 203722,\n \"avg_ts\": 112.364261,\n \"stddev_ts\": 0.004438,\n \"samples_ns\": [ 4556410710, 4556654113, 4556762053 ],\n \"samples_ts\": [ 112.369, 112.363, 112.36 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:56:39Z\",\n \"avg_ns\": 15688529647,\n \"stddev_ns\": 10692741,\n \"avg_ts\": 32.635318,\n \"stddev_ts\": 0.022249,\n \"samples_ns\": [ 15676184547, 15694655084, 15694749312 ],\n \"samples_ts\": [ 32.661, 32.6226, 32.6224 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:56:21Z", + "avg_ns": 4556608958, + "stddev_ns": 203722, + "avg_ts": 112.364261, + "stddev_ts": 0.004438, + "samples_ns": [ + 4556410710, + 4556654113, + 4556762053 + ], + "samples_ts": [ + 112.369, + 112.363, + 112.36 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:56:39Z", + "avg_ns": 15688529647, + "stddev_ns": 10692741, + "avg_ts": 32.635318, + "stddev_ts": 0.022249, + "samples_ns": [ + 15676184547, + 15694655084, + 15694749312 + ], + "samples_ts": [ + 32.661, + 32.6226, + 32.6224 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 151 + }, + { + "timestamp_utc": "2025-12-08T21:57:43.411931+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:57:27Z\",\n \"avg_ns\": 1085563809,\n \"stddev_ns\": 141007,\n \"avg_ts\": 117.911080,\n \"stddev_ts\": 0.014891,\n \"samples_ns\": [ 1085468753, 1085501698, 1085720977 ],\n \"samples_ts\": [ 117.921, 117.918, 117.894 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:57:32Z\",\n \"avg_ns\": 3722088818,\n \"stddev_ns\": 779869,\n \"avg_ts\": 34.389293,\n \"stddev_ts\": 0.007206,\n \"samples_ns\": [ 3722469967, 3721191674, 3722604813 ],\n \"samples_ts\": [ 34.3858, 34.3976, 34.3845 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:57:27Z", + "avg_ns": 1085563809, + "stddev_ns": 141007, + "avg_ts": 117.91108, + "stddev_ts": 0.014891, + "samples_ns": [ + 1085468753, + 1085501698, + 1085720977 + ], + "samples_ts": [ + 117.921, + 117.918, + 117.894 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:57:32Z", + "avg_ns": 3722088818, + "stddev_ns": 779869, + "avg_ts": 34.389293, + "stddev_ts": 0.007206, + "samples_ns": [ + 3722469967, + 3721191674, + 3722604813 + ], + "samples_ts": [ + 34.3858, + 34.3976, + 34.3845 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 152 + }, + { + "timestamp_utc": "2025-12-08T21:58:34.649490+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:57:44Z\",\n \"avg_ns\": 1086229119,\n \"stddev_ns\": 211788,\n \"avg_ts\": 117.838862,\n \"stddev_ts\": 0.022696,\n \"samples_ns\": [ 1086024179, 1086442350, 1086220829 ],\n \"samples_ts\": [ 117.861, 117.816, 117.84 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:57:48Z\",\n \"avg_ns\": 15357401400,\n \"stddev_ns\": 24725491,\n \"avg_ts\": 33.339031,\n \"stddev_ts\": 0.053724,\n \"samples_ns\": [ 15328919138, 15369942611, 15373342453 ],\n \"samples_ts\": [ 33.4009, 33.3118, 33.3044 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:57:44Z", + "avg_ns": 1086229119, + "stddev_ns": 211788, + "avg_ts": 117.838862, + "stddev_ts": 0.022696, + "samples_ns": [ + 1086024179, + 1086442350, + 1086220829 + ], + "samples_ts": [ + 117.861, + 117.816, + 117.84 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:57:48Z", + "avg_ns": 15357401400, + "stddev_ns": 24725491, + "avg_ts": 33.339031, + "stddev_ts": 0.053724, + "samples_ns": [ + 15328919138, + 15369942611, + 15373342453 + ], + "samples_ts": [ + 33.4009, + 33.3118, + 33.3044 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 153 + }, + { + "timestamp_utc": "2025-12-08T21:59:05.793110+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:58:35Z\",\n \"avg_ns\": 4800113579,\n \"stddev_ns\": 909470,\n \"avg_ts\": 106.664145,\n \"stddev_ts\": 0.020091,\n \"samples_ns\": [ 4800038029, 4801053162, 4799249548 ],\n \"samples_ts\": [ 106.666, 106.643, 106.683 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:58:54Z\",\n \"avg_ns\": 3719016912,\n \"stddev_ns\": 12770882,\n \"avg_ts\": 34.417968,\n \"stddev_ts\": 0.117954,\n \"samples_ns\": [ 3711290736, 3712002610, 3733757392 ],\n \"samples_ts\": [ 34.4893, 34.4827, 34.2818 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:58:35Z", + "avg_ns": 4800113579, + "stddev_ns": 909470, + "avg_ts": 106.664145, + "stddev_ts": 0.020091, + "samples_ns": [ + 4800038029, + 4801053162, + 4799249548 + ], + "samples_ts": [ + 106.666, + 106.643, + 106.683 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T21:58:54Z", + "avg_ns": 3719016912, + "stddev_ns": 12770882, + "avg_ts": 34.417968, + "stddev_ts": 0.117954, + "samples_ns": [ + 3711290736, + 3712002610, + 3733757392 + ], + "samples_ts": [ + 34.4893, + 34.4827, + 34.2818 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 154 + }, + { + "timestamp_utc": "2025-12-08T22:00:12.442953+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:59:06Z\",\n \"avg_ns\": 4865920273,\n \"stddev_ns\": 845940,\n \"avg_ts\": 105.221619,\n \"stddev_ts\": 0.018293,\n \"samples_ns\": [ 4865060240, 4866751379, 4865949200 ],\n \"samples_ts\": [ 105.24, 105.204, 105.221 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T21:59:25Z\",\n \"avg_ns\": 15465409013,\n \"stddev_ns\": 1925491,\n \"avg_ts\": 33.106141,\n \"stddev_ts\": 0.004122,\n \"samples_ns\": [ 15463983674, 15464643906, 15467599459 ],\n \"samples_ts\": [ 33.1092, 33.1078, 33.1015 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T21:59:06Z", + "avg_ns": 4865920273, + "stddev_ns": 845940, + "avg_ts": 105.221619, + "stddev_ts": 0.018293, + "samples_ns": [ + 4865060240, + 4866751379, + 4865949200 + ], + "samples_ts": [ + 105.24, + 105.204, + 105.221 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T21:59:25Z", + "avg_ns": 15465409013, + "stddev_ns": 1925491, + "avg_ts": 33.106141, + "stddev_ts": 0.004122, + "samples_ns": [ + 15463983674, + 15464643906, + 15467599459 + ], + "samples_ts": [ + 33.1092, + 33.1078, + 33.1015 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 155 + }, + { + "timestamp_utc": "2025-12-08T22:00:28.901247+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:00:13Z\",\n \"avg_ns\": 1086034722,\n \"stddev_ns\": 126017,\n \"avg_ts\": 117.859953,\n \"stddev_ts\": 0.012706,\n \"samples_ns\": [ 1085912160, 1086145422, 1086046586 ],\n \"samples_ts\": [ 117.873, 117.848, 117.859 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:00:17Z\",\n \"avg_ns\": 3775703011,\n \"stddev_ns\": 608919,\n \"avg_ts\": 33.900972,\n \"stddev_ts\": 0.005411,\n \"samples_ns\": [ 3776396045, 3775301625, 3775411365 ],\n \"samples_ts\": [ 33.8948, 33.9046, 33.9036 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:00:13Z", + "avg_ns": 1086034722, + "stddev_ns": 126017, + "avg_ts": 117.859953, + "stddev_ts": 0.012706, + "samples_ns": [ + 1085912160, + 1086145422, + 1086046586 + ], + "samples_ts": [ + 117.873, + 117.848, + 117.859 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:00:17Z", + "avg_ns": 3775703011, + "stddev_ns": 608919, + "avg_ts": 33.900972, + "stddev_ts": 0.005411, + "samples_ns": [ + 3776396045, + 3775301625, + 3775411365 + ], + "samples_ts": [ + 33.8948, + 33.9046, + 33.9036 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 156 + }, + { + "timestamp_utc": "2025-12-08T22:01:21.028642+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:00:29Z\",\n \"avg_ns\": 1086757337,\n \"stddev_ns\": 54323,\n \"avg_ts\": 117.781584,\n \"stddev_ts\": 0.005887,\n \"samples_ns\": [ 1086748109, 1086815683, 1086708219 ],\n \"samples_ts\": [ 117.783, 117.775, 117.787 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:00:33Z\",\n \"avg_ns\": 15665225284,\n \"stddev_ns\": 9029921,\n \"avg_ts\": 32.683865,\n \"stddev_ts\": 0.018842,\n \"samples_ns\": [ 15669117580, 15654903835, 15671654439 ],\n \"samples_ts\": [ 32.6757, 32.7054, 32.6704 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:00:29Z", + "avg_ns": 1086757337, + "stddev_ns": 54323, + "avg_ts": 117.781584, + "stddev_ts": 0.005887, + "samples_ns": [ + 1086748109, + 1086815683, + 1086708219 + ], + "samples_ts": [ + 117.783, + 117.775, + 117.787 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:00:33Z", + "avg_ns": 15665225284, + "stddev_ns": 9029921, + "avg_ts": 32.683865, + "stddev_ts": 0.018842, + "samples_ns": [ + 15669117580, + 15654903835, + 15671654439 + ], + "samples_ts": [ + 32.6757, + 32.7054, + 32.6704 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 157 + }, + { + "timestamp_utc": "2025-12-08T22:01:51.371763+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:01:21Z\",\n \"avg_ns\": 4544836670,\n \"stddev_ns\": 281526,\n \"avg_ts\": 112.655314,\n \"stddev_ts\": 0.006566,\n \"samples_ns\": [ 4545069386, 4544892214, 4544548412 ],\n \"samples_ts\": [ 112.65, 112.654, 112.662 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:01:39Z\",\n \"avg_ns\": 3791579029,\n \"stddev_ns\": 928484,\n \"avg_ts\": 33.759023,\n \"stddev_ts\": 0.008266,\n \"samples_ns\": [ 3790770043, 3792592816, 3791374228 ],\n \"samples_ts\": [ 33.7662, 33.75, 33.7608 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:01:21Z", + "avg_ns": 4544836670, + "stddev_ns": 281526, + "avg_ts": 112.655314, + "stddev_ts": 0.006566, + "samples_ns": [ + 4545069386, + 4544892214, + 4544548412 + ], + "samples_ts": [ + 112.65, + 112.654, + 112.662 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:01:39Z", + "avg_ns": 3791579029, + "stddev_ns": 928484, + "avg_ts": 33.759023, + "stddev_ts": 0.008266, + "samples_ns": [ + 3790770043, + 3792592816, + 3791374228 + ], + "samples_ts": [ + 33.7662, + 33.75, + 33.7608 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 158 + }, + { + "timestamp_utc": "2025-12-08T22:02:56.730142+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:01:52Z\",\n \"avg_ns\": 4556566400,\n \"stddev_ns\": 621507,\n \"avg_ts\": 112.365312,\n \"stddev_ts\": 0.015235,\n \"samples_ns\": [ 4557275379, 4556143162, 4556280660 ],\n \"samples_ts\": [ 112.348, 112.376, 112.372 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:02:10Z\",\n \"avg_ns\": 15433290729,\n \"stddev_ns\": 4334330,\n \"avg_ts\": 33.175039,\n \"stddev_ts\": 0.009310,\n \"samples_ns\": [ 15428545989, 15434295208, 15437030992 ],\n \"samples_ts\": [ 33.1852, 33.1729, 33.167 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:01:52Z", + "avg_ns": 4556566400, + "stddev_ns": 621507, + "avg_ts": 112.365312, + "stddev_ts": 0.015235, + "samples_ns": [ + 4557275379, + 4556143162, + 4556280660 + ], + "samples_ts": [ + 112.348, + 112.376, + 112.372 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:02:10Z", + "avg_ns": 15433290729, + "stddev_ns": 4334330, + "avg_ts": 33.175039, + "stddev_ts": 0.00931, + "samples_ns": [ + 15428545989, + 15434295208, + 15437030992 + ], + "samples_ts": [ + 33.1852, + 33.1729, + 33.167 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 159 + }, + { + "timestamp_utc": "2025-12-08T22:03:13.276718+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:02:57Z\",\n \"avg_ns\": 1086962380,\n \"stddev_ns\": 145609,\n \"avg_ts\": 117.759367,\n \"stddev_ts\": 0.015775,\n \"samples_ns\": [ 1086972177, 1087102844, 1086812119 ],\n \"samples_ts\": [ 117.758, 117.744, 117.776 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:03:01Z\",\n \"avg_ns\": 3789403432,\n \"stddev_ns\": 1503342,\n \"avg_ts\": 33.778408,\n \"stddev_ts\": 0.013400,\n \"samples_ns\": [ 3790973905, 3789258702, 3787977689 ],\n \"samples_ts\": [ 33.7644, 33.7797, 33.7911 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:02:57Z", + "avg_ns": 1086962380, + "stddev_ns": 145609, + "avg_ts": 117.759367, + "stddev_ts": 0.015775, + "samples_ns": [ + 1086972177, + 1087102844, + 1086812119 + ], + "samples_ts": [ + 117.758, + 117.744, + 117.776 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:03:01Z", + "avg_ns": 3789403432, + "stddev_ns": 1503342, + "avg_ts": 33.778408, + "stddev_ts": 0.0134, + "samples_ns": [ + 3790973905, + 3789258702, + 3787977689 + ], + "samples_ts": [ + 33.7644, + 33.7797, + 33.7911 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 160 + }, + { + "timestamp_utc": "2025-12-08T22:04:05.065485+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:03:13Z\",\n \"avg_ns\": 1087377770,\n \"stddev_ns\": 174423,\n \"avg_ts\": 117.714382,\n \"stddev_ts\": 0.018881,\n \"samples_ns\": [ 1087306140, 1087576605, 1087250565 ],\n \"samples_ts\": [ 117.722, 117.693, 117.728 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:03:18Z\",\n \"avg_ns\": 15551291299,\n \"stddev_ns\": 47305625,\n \"avg_ts\": 32.923514,\n \"stddev_ts\": 0.099976,\n \"samples_ns\": [ 15605872258, 15522126948, 15525874691 ],\n \"samples_ts\": [ 32.8082, 32.9852, 32.9772 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:03:13Z", + "avg_ns": 1087377770, + "stddev_ns": 174423, + "avg_ts": 117.714382, + "stddev_ts": 0.018881, + "samples_ns": [ + 1087306140, + 1087576605, + 1087250565 + ], + "samples_ts": [ + 117.722, + 117.693, + 117.728 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:03:18Z", + "avg_ns": 15551291299, + "stddev_ns": 47305625, + "avg_ts": 32.923514, + "stddev_ts": 0.099976, + "samples_ns": [ + 15605872258, + 15522126948, + 15525874691 + ], + "samples_ts": [ + 32.8082, + 32.9852, + 32.9772 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 161 + }, + { + "timestamp_utc": "2025-12-08T22:04:35.443318+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:04:05Z\",\n \"avg_ns\": 4576227433,\n \"stddev_ns\": 692770,\n \"avg_ts\": 111.882553,\n \"stddev_ts\": 0.016855,\n \"samples_ns\": [ 4576014496, 4576998242, 4575669562 ],\n \"samples_ts\": [ 111.888, 111.864, 111.896 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:04:24Z\",\n \"avg_ns\": 3761353711,\n \"stddev_ns\": 651002,\n \"avg_ts\": 34.030302,\n \"stddev_ts\": 0.005890,\n \"samples_ns\": [ 3761960292, 3760665909, 3761434932 ],\n \"samples_ts\": [ 34.0248, 34.0365, 34.0296 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:04:05Z", + "avg_ns": 4576227433, + "stddev_ns": 692770, + "avg_ts": 111.882553, + "stddev_ts": 0.016855, + "samples_ns": [ + 4576014496, + 4576998242, + 4575669562 + ], + "samples_ts": [ + 111.888, + 111.864, + 111.896 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:04:24Z", + "avg_ns": 3761353711, + "stddev_ns": 651002, + "avg_ts": 34.030302, + "stddev_ts": 0.00589, + "samples_ns": [ + 3761960292, + 3760665909, + 3761434932 + ], + "samples_ts": [ + 34.0248, + 34.0365, + 34.0296 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 162 + }, + { + "timestamp_utc": "2025-12-08T22:05:41.294043+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:04:36Z\",\n \"avg_ns\": 4557751611,\n \"stddev_ns\": 520683,\n \"avg_ts\": 112.336092,\n \"stddev_ts\": 0.012616,\n \"samples_ns\": [ 4558146321, 4557173273, 4557935241 ],\n \"samples_ts\": [ 112.326, 112.35, 112.332 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:04:54Z\",\n \"avg_ns\": 15610145377,\n \"stddev_ns\": 2133151,\n \"avg_ts\": 32.799182,\n \"stddev_ts\": 0.004467,\n \"samples_ns\": [ 15608124501, 15612362530, 15609949102 ],\n \"samples_ts\": [ 32.8034, 32.7945, 32.7996 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:04:36Z", + "avg_ns": 4557751611, + "stddev_ns": 520683, + "avg_ts": 112.336092, + "stddev_ts": 0.012616, + "samples_ns": [ + 4558146321, + 4557173273, + 4557935241 + ], + "samples_ts": [ + 112.326, + 112.35, + 112.332 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:04:54Z", + "avg_ns": 15610145377, + "stddev_ns": 2133151, + "avg_ts": 32.799182, + "stddev_ts": 0.004467, + "samples_ns": [ + 15608124501, + 15612362530, + 15609949102 + ], + "samples_ts": [ + 32.8034, + 32.7945, + 32.7996 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 163 + }, + { + "timestamp_utc": "2025-12-08T22:05:57.712334+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:05:41Z\",\n \"avg_ns\": 1086645871,\n \"stddev_ns\": 120829,\n \"avg_ts\": 117.793666,\n \"stddev_ts\": 0.012084,\n \"samples_ns\": [ 1086547404, 1086623310, 1086766901 ],\n \"samples_ts\": [ 117.804, 117.796, 117.781 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:05:46Z\",\n \"avg_ns\": 3756777430,\n \"stddev_ns\": 178112,\n \"avg_ts\": 34.071755,\n \"stddev_ts\": 0.001517,\n \"samples_ns\": [ 3756612903, 3756947247, 3756772141 ],\n \"samples_ts\": [ 34.0732, 34.0702, 34.0718 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:05:41Z", + "avg_ns": 1086645871, + "stddev_ns": 120829, + "avg_ts": 117.793666, + "stddev_ts": 0.012084, + "samples_ns": [ + 1086547404, + 1086623310, + 1086766901 + ], + "samples_ts": [ + 117.804, + 117.796, + 117.781 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:05:46Z", + "avg_ns": 3756777430, + "stddev_ns": 178112, + "avg_ts": 34.071755, + "stddev_ts": 0.001517, + "samples_ns": [ + 3756612903, + 3756947247, + 3756772141 + ], + "samples_ts": [ + 34.0732, + 34.0702, + 34.0718 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 164 + }, + { + "timestamp_utc": "2025-12-08T22:06:49.403820+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:05:58Z\",\n \"avg_ns\": 1085964870,\n \"stddev_ns\": 139601,\n \"avg_ts\": 117.867534,\n \"stddev_ts\": 0.015151,\n \"samples_ns\": [ 1086125158, 1085899542, 1085869910 ],\n \"samples_ts\": [ 117.85, 117.875, 117.878 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:06:02Z\",\n \"avg_ns\": 15521235944,\n \"stddev_ns\": 32806057,\n \"avg_ts\": 32.987162,\n \"stddev_ts\": 0.069638,\n \"samples_ns\": [ 15559061740, 15500554221, 15504091872 ],\n \"samples_ts\": [ 32.9069, 33.0311, 33.0235 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:05:58Z", + "avg_ns": 1085964870, + "stddev_ns": 139601, + "avg_ts": 117.867534, + "stddev_ts": 0.015151, + "samples_ns": [ + 1086125158, + 1085899542, + 1085869910 + ], + "samples_ts": [ + 117.85, + 117.875, + 117.878 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:06:02Z", + "avg_ns": 15521235944, + "stddev_ns": 32806057, + "avg_ts": 32.987162, + "stddev_ts": 0.069638, + "samples_ns": [ + 15559061740, + 15500554221, + 15504091872 + ], + "samples_ts": [ + 32.9069, + 33.0311, + 33.0235 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 165 + }, + { + "timestamp_utc": "2025-12-08T22:07:20.969587+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:06:50Z\",\n \"avg_ns\": 4846784729,\n \"stddev_ns\": 1007852,\n \"avg_ts\": 105.637045,\n \"stddev_ts\": 0.021964,\n \"samples_ns\": [ 4846032297, 4847929808, 4846392082 ],\n \"samples_ts\": [ 105.653, 105.612, 105.646 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:09Z\",\n \"avg_ns\": 3789288872,\n \"stddev_ns\": 1155087,\n \"avg_ts\": 33.779427,\n \"stddev_ts\": 0.010281,\n \"samples_ns\": [ 3790617875, 3788700218, 3788548524 ],\n \"samples_ts\": [ 33.7676, 33.7847, 33.786 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:06:50Z", + "avg_ns": 4846784729, + "stddev_ns": 1007852, + "avg_ts": 105.637045, + "stddev_ts": 0.021964, + "samples_ns": [ + 4846032297, + 4847929808, + 4846392082 + ], + "samples_ts": [ + 105.653, + 105.612, + 105.646 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:07:09Z", + "avg_ns": 3789288872, + "stddev_ns": 1155087, + "avg_ts": 33.779427, + "stddev_ts": 0.010281, + "samples_ns": [ + 3790617875, + 3788700218, + 3788548524 + ], + "samples_ts": [ + 33.7676, + 33.7847, + 33.786 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 166 + }, + { + "timestamp_utc": "2025-12-08T22:08:27.739893+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:21Z\",\n \"avg_ns\": 4861001084,\n \"stddev_ns\": 807554,\n \"avg_ts\": 105.328100,\n \"stddev_ts\": 0.017366,\n \"samples_ns\": [ 4860669141, 4861915242, 4860418871 ],\n \"samples_ts\": [ 105.335, 105.308, 105.341 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:07:41Z\",\n \"avg_ns\": 15510637476,\n \"stddev_ns\": 9880592,\n \"avg_ts\": 33.009613,\n \"stddev_ts\": 0.021022,\n \"samples_ns\": [ 15521768066, 15502902255, 15507242107 ],\n \"samples_ts\": [ 32.9859, 33.0261, 33.0168 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:07:21Z", + "avg_ns": 4861001084, + "stddev_ns": 807554, + "avg_ts": 105.3281, + "stddev_ts": 0.017366, + "samples_ns": [ + 4860669141, + 4861915242, + 4860418871 + ], + "samples_ts": [ + 105.335, + 105.308, + 105.341 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:07:41Z", + "avg_ns": 15510637476, + "stddev_ns": 9880592, + "avg_ts": 33.009613, + "stddev_ts": 0.021022, + "samples_ns": [ + 15521768066, + 15502902255, + 15507242107 + ], + "samples_ts": [ + 32.9859, + 33.0261, + 33.0168 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 167 + }, + { + "timestamp_utc": "2025-12-08T22:08:44.260930+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:28Z\",\n \"avg_ns\": 1086070906,\n \"stddev_ns\": 114432,\n \"avg_ts\": 117.856026,\n \"stddev_ts\": 0.011342,\n \"samples_ns\": [ 1086159730, 1085955745, 1086097245 ],\n \"samples_ts\": [ 117.846, 117.869, 117.853 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:32Z\",\n \"avg_ns\": 3782075844,\n \"stddev_ns\": 385052,\n \"avg_ts\": 33.843848,\n \"stddev_ts\": 0.003357,\n \"samples_ns\": [ 3782393499, 3782172013, 3781662022 ],\n \"samples_ts\": [ 33.841, 33.843, 33.8476 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:08:28Z", + "avg_ns": 1086070906, + "stddev_ns": 114432, + "avg_ts": 117.856026, + "stddev_ts": 0.011342, + "samples_ns": [ + 1086159730, + 1085955745, + 1086097245 + ], + "samples_ts": [ + 117.846, + 117.869, + 117.853 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:08:32Z", + "avg_ns": 3782075844, + "stddev_ns": 385052, + "avg_ts": 33.843848, + "stddev_ts": 0.003357, + "samples_ns": [ + 3782393499, + 3782172013, + 3781662022 + ], + "samples_ts": [ + 33.841, + 33.843, + 33.8476 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 168 + }, + { + "timestamp_utc": "2025-12-08T22:09:36.158039+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:44Z\",\n \"avg_ns\": 1085917938,\n \"stddev_ns\": 91951,\n \"avg_ts\": 117.872627,\n \"stddev_ts\": 0.008604,\n \"samples_ns\": [ 1085831421, 1085987069, 1085935326 ],\n \"samples_ts\": [ 117.882, 117.865, 117.871 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:08:49Z\",\n \"avg_ns\": 15589422494,\n \"stddev_ns\": 2408208,\n \"avg_ts\": 32.842782,\n \"stddev_ts\": 0.005059,\n \"samples_ns\": [ 15588546667, 15587581597, 15592139220 ],\n \"samples_ts\": [ 32.8446, 32.8467, 32.8371 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:08:44Z", + "avg_ns": 1085917938, + "stddev_ns": 91951, + "avg_ts": 117.872627, + "stddev_ts": 0.008604, + "samples_ns": [ + 1085831421, + 1085987069, + 1085935326 + ], + "samples_ts": [ + 117.882, + 117.865, + 117.871 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:08:49Z", + "avg_ns": 15589422494, + "stddev_ns": 2408208, + "avg_ts": 32.842782, + "stddev_ts": 0.005059, + "samples_ns": [ + 15588546667, + 15587581597, + 15592139220 + ], + "samples_ts": [ + 32.8446, + 32.8467, + 32.8371 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 169 + }, + { + "timestamp_utc": "2025-12-08T22:10:06.350985+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:09:36Z\",\n \"avg_ns\": 4541353810,\n \"stddev_ns\": 285971,\n \"avg_ts\": 112.741712,\n \"stddev_ts\": 0.006899,\n \"samples_ns\": [ 4541674514, 4541203463, 4541183454 ],\n \"samples_ts\": [ 112.734, 112.745, 112.746 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:09:55Z\",\n \"avg_ns\": 3745404282,\n \"stddev_ns\": 1245621,\n \"avg_ts\": 34.175218,\n \"stddev_ts\": 0.011354,\n \"samples_ns\": [ 3746313242, 3743986386, 3745913219 ],\n \"samples_ts\": [ 34.1669, 34.1882, 34.1706 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:09:36Z", + "avg_ns": 4541353810, + "stddev_ns": 285971, + "avg_ts": 112.741712, + "stddev_ts": 0.006899, + "samples_ns": [ + 4541674514, + 4541203463, + 4541183454 + ], + "samples_ts": [ + 112.734, + 112.745, + 112.746 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:09:55Z", + "avg_ns": 3745404282, + "stddev_ns": 1245621, + "avg_ts": 34.175218, + "stddev_ts": 0.011354, + "samples_ns": [ + 3746313242, + 3743986386, + 3745913219 + ], + "samples_ts": [ + 34.1669, + 34.1882, + 34.1706 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 170 + }, + { + "timestamp_utc": "2025-12-08T22:11:11.920690+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:10:07Z\",\n \"avg_ns\": 4548851732,\n \"stddev_ns\": 231579,\n \"avg_ts\": 112.555878,\n \"stddev_ts\": 0.005222,\n \"samples_ns\": [ 4548988681, 4548957801, 4548608716 ],\n \"samples_ts\": [ 112.552, 112.553, 112.562 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:10:25Z\",\n \"avg_ns\": 15526173884,\n \"stddev_ns\": 48233727,\n \"avg_ts\": 32.976784,\n \"stddev_ts\": 0.102265,\n \"samples_ns\": [ 15581800102, 15495955487, 15500766063 ],\n \"samples_ts\": [ 32.8588, 33.0409, 33.0306 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:10:07Z", + "avg_ns": 4548851732, + "stddev_ns": 231579, + "avg_ts": 112.555878, + "stddev_ts": 0.005222, + "samples_ns": [ + 4548988681, + 4548957801, + 4548608716 + ], + "samples_ts": [ + 112.552, + 112.553, + 112.562 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:10:25Z", + "avg_ns": 15526173884, + "stddev_ns": 48233727, + "avg_ts": 32.976784, + "stddev_ts": 0.102265, + "samples_ns": [ + 15581800102, + 15495955487, + 15500766063 + ], + "samples_ts": [ + 32.8588, + 33.0409, + 33.0306 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 171 + }, + { + "timestamp_utc": "2025-12-08T22:11:28.303294+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:11:12Z\",\n \"avg_ns\": 1087368413,\n \"stddev_ns\": 178342,\n \"avg_ts\": 117.715395,\n \"stddev_ts\": 0.018633,\n \"samples_ns\": [ 1087257574, 1087280944, 1087566723 ],\n \"samples_ts\": [ 117.727, 117.725, 117.694 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:11:16Z\",\n \"avg_ns\": 3740434743,\n \"stddev_ns\": 776680,\n \"avg_ts\": 34.220622,\n \"stddev_ts\": 0.007083,\n \"samples_ns\": [ 3741313539, 3739852935, 3740137756 ],\n \"samples_ts\": [ 34.2126, 34.2259, 34.2233 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:11:12Z", + "avg_ns": 1087368413, + "stddev_ns": 178342, + "avg_ts": 117.715395, + "stddev_ts": 0.018633, + "samples_ns": [ + 1087257574, + 1087280944, + 1087566723 + ], + "samples_ts": [ + 117.727, + 117.725, + 117.694 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:11:16Z", + "avg_ns": 3740434743, + "stddev_ns": 776680, + "avg_ts": 34.220622, + "stddev_ts": 0.007083, + "samples_ns": [ + 3741313539, + 3739852935, + 3740137756 + ], + "samples_ts": [ + 34.2126, + 34.2259, + 34.2233 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 172 + }, + { + "timestamp_utc": "2025-12-08T22:12:19.777660+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:11:28Z\",\n \"avg_ns\": 1085952577,\n \"stddev_ns\": 264847,\n \"avg_ts\": 117.868872,\n \"stddev_ts\": 0.028301,\n \"samples_ns\": [ 1086046709, 1086153154, 1085657870 ],\n \"samples_ts\": [ 117.859, 117.847, 117.901 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:11:33Z\",\n \"avg_ns\": 15446734086,\n \"stddev_ns\": 25782842,\n \"avg_ts\": 33.146227,\n \"stddev_ts\": 0.055273,\n \"samples_ns\": [ 15476505540, 15431829436, 15431867282 ],\n \"samples_ts\": [ 33.0824, 33.1782, 33.1781 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:11:28Z", + "avg_ns": 1085952577, + "stddev_ns": 264847, + "avg_ts": 117.868872, + "stddev_ts": 0.028301, + "samples_ns": [ + 1086046709, + 1086153154, + 1085657870 + ], + "samples_ts": [ + 117.859, + 117.847, + 117.901 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:11:33Z", + "avg_ns": 15446734086, + "stddev_ns": 25782842, + "avg_ts": 33.146227, + "stddev_ts": 0.055273, + "samples_ns": [ + 15476505540, + 15431829436, + 15431867282 + ], + "samples_ts": [ + 33.0824, + 33.1782, + 33.1781 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 173 + }, + { + "timestamp_utc": "2025-12-08T22:12:50.072846+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:12:20Z\",\n \"avg_ns\": 4560014372,\n \"stddev_ns\": 157894,\n \"avg_ts\": 112.280348,\n \"stddev_ts\": 0.003888,\n \"samples_ns\": [ 4559833205, 4560122688, 4560087223 ],\n \"samples_ts\": [ 112.285, 112.278, 112.279 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:12:38Z\",\n \"avg_ns\": 3747143112,\n \"stddev_ns\": 1212751,\n \"avg_ts\": 34.159360,\n \"stddev_ts\": 0.011039,\n \"samples_ns\": [ 3748540657, 3746490940, 3746397740 ],\n \"samples_ts\": [ 34.1466, 34.1653, 34.1662 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:12:20Z", + "avg_ns": 4560014372, + "stddev_ns": 157894, + "avg_ts": 112.280348, + "stddev_ts": 0.003888, + "samples_ns": [ + 4559833205, + 4560122688, + 4560087223 + ], + "samples_ts": [ + 112.285, + 112.278, + 112.279 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:12:38Z", + "avg_ns": 3747143112, + "stddev_ns": 1212751, + "avg_ts": 34.15936, + "stddev_ts": 0.011039, + "samples_ns": [ + 3748540657, + 3746490940, + 3746397740 + ], + "samples_ts": [ + 34.1466, + 34.1653, + 34.1662 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 174 + }, + { + "timestamp_utc": "2025-12-08T22:13:55.490183+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:12:50Z\",\n \"avg_ns\": 4556337712,\n \"stddev_ns\": 495038,\n \"avg_ts\": 112.370951,\n \"stddev_ts\": 0.012094,\n \"samples_ns\": [ 4555980259, 4556896804, 4556136074 ],\n \"samples_ts\": [ 112.38, 112.357, 112.376 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:13:08Z\",\n \"avg_ns\": 15467120258,\n \"stddev_ns\": 1526763,\n \"avg_ts\": 33.102478,\n \"stddev_ts\": 0.003246,\n \"samples_ns\": [ 15467877568, 15465374148, 15468109060 ],\n \"samples_ts\": [ 33.1009, 33.1062, 33.1004 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:12:50Z", + "avg_ns": 4556337712, + "stddev_ns": 495038, + "avg_ts": 112.370951, + "stddev_ts": 0.012094, + "samples_ns": [ + 4555980259, + 4556896804, + 4556136074 + ], + "samples_ts": [ + 112.38, + 112.357, + 112.376 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:13:08Z", + "avg_ns": 15467120258, + "stddev_ns": 1526763, + "avg_ts": 33.102478, + "stddev_ts": 0.003246, + "samples_ns": [ + 15467877568, + 15465374148, + 15468109060 + ], + "samples_ts": [ + 33.1009, + 33.1062, + 33.1004 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 175 + }, + { + "timestamp_utc": "2025-12-08T22:14:11.804888+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:13:56Z\",\n \"avg_ns\": 1086289378,\n \"stddev_ns\": 205821,\n \"avg_ts\": 117.832325,\n \"stddev_ts\": 0.022324,\n \"samples_ns\": [ 1086216909, 1086129593, 1086521632 ],\n \"samples_ts\": [ 117.84, 117.85, 117.807 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:00Z\",\n \"avg_ns\": 3722154045,\n \"stddev_ns\": 574037,\n \"avg_ts\": 34.388690,\n \"stddev_ts\": 0.005243,\n \"samples_ns\": [ 3722679457, 3721552176, 3722230504 ],\n \"samples_ts\": [ 34.3838, 34.3943, 34.388 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:13:56Z", + "avg_ns": 1086289378, + "stddev_ns": 205821, + "avg_ts": 117.832325, + "stddev_ts": 0.022324, + "samples_ns": [ + 1086216909, + 1086129593, + 1086521632 + ], + "samples_ts": [ + 117.84, + 117.85, + 117.807 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:14:00Z", + "avg_ns": 3722154045, + "stddev_ns": 574037, + "avg_ts": 34.38869, + "stddev_ts": 0.005243, + "samples_ns": [ + 3722679457, + 3721552176, + 3722230504 + ], + "samples_ts": [ + 34.3838, + 34.3943, + 34.388 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 176 + }, + { + "timestamp_utc": "2025-12-08T22:15:03.121800+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:12Z\",\n \"avg_ns\": 1086161961,\n \"stddev_ns\": 66462,\n \"avg_ts\": 117.846145,\n \"stddev_ts\": 0.007211,\n \"samples_ns\": [ 1086085764, 1086207986, 1086192133 ],\n \"samples_ts\": [ 117.854, 117.841, 117.843 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:14:16Z\",\n \"avg_ns\": 15392953161,\n \"stddev_ns\": 75449494,\n \"avg_ts\": 33.262505,\n \"stddev_ts\": 0.162583,\n \"samples_ns\": [ 15479966685, 15345694009, 15353198791 ],\n \"samples_ts\": [ 33.075, 33.3644, 33.3481 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:14:12Z", + "avg_ns": 1086161961, + "stddev_ns": 66462, + "avg_ts": 117.846145, + "stddev_ts": 0.007211, + "samples_ns": [ + 1086085764, + 1086207986, + 1086192133 + ], + "samples_ts": [ + 117.854, + 117.841, + 117.843 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:14:16Z", + "avg_ns": 15392953161, + "stddev_ns": 75449494, + "avg_ts": 33.262505, + "stddev_ts": 0.162583, + "samples_ns": [ + 15479966685, + 15345694009, + 15353198791 + ], + "samples_ts": [ + 33.075, + 33.3644, + 33.3481 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 177 + }, + { + "timestamp_utc": "2025-12-08T22:15:34.701156+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:03Z\",\n \"avg_ns\": 4854099567,\n \"stddev_ns\": 561864,\n \"avg_ts\": 105.477854,\n \"stddev_ts\": 0.012209,\n \"samples_ns\": [ 4854629033, 4853510117, 4854159551 ],\n \"samples_ts\": [ 105.466, 105.491, 105.477 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:23Z\",\n \"avg_ns\": 3790626411,\n \"stddev_ns\": 508255,\n \"avg_ts\": 33.767506,\n \"stddev_ts\": 0.004528,\n \"samples_ns\": [ 3791117024, 3790660028, 3790102181 ],\n \"samples_ts\": [ 33.7631, 33.7672, 33.7722 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:15:03Z", + "avg_ns": 4854099567, + "stddev_ns": 561864, + "avg_ts": 105.477854, + "stddev_ts": 0.012209, + "samples_ns": [ + 4854629033, + 4853510117, + 4854159551 + ], + "samples_ts": [ + 105.466, + 105.491, + 105.477 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:15:23Z", + "avg_ns": 3790626411, + "stddev_ns": 508255, + "avg_ts": 33.767506, + "stddev_ts": 0.004528, + "samples_ns": [ + 3791117024, + 3790660028, + 3790102181 + ], + "samples_ts": [ + 33.7631, + 33.7672, + 33.7722 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 178 + }, + { + "timestamp_utc": "2025-12-08T22:16:40.855672+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:35Z\",\n \"avg_ns\": 4832811590,\n \"stddev_ns\": 989437,\n \"avg_ts\": 105.942474,\n \"stddev_ts\": 0.021585,\n \"samples_ns\": [ 4831679232, 4833290342, 4833465198 ],\n \"samples_ts\": [ 105.967, 105.932, 105.928 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:15:54Z\",\n \"avg_ns\": 15342098538,\n \"stddev_ns\": 20081467,\n \"avg_ts\": 33.372265,\n \"stddev_ts\": 0.043648,\n \"samples_ns\": [ 15365285945, 15330414713, 15330594957 ],\n \"samples_ts\": [ 33.3219, 33.3977, 33.3973 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:15:35Z", + "avg_ns": 4832811590, + "stddev_ns": 989437, + "avg_ts": 105.942474, + "stddev_ts": 0.021585, + "samples_ns": [ + 4831679232, + 4833290342, + 4833465198 + ], + "samples_ts": [ + 105.967, + 105.932, + 105.928 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:15:54Z", + "avg_ns": 15342098538, + "stddev_ns": 20081467, + "avg_ts": 33.372265, + "stddev_ts": 0.043648, + "samples_ns": [ + 15365285945, + 15330414713, + 15330594957 + ], + "samples_ts": [ + 33.3219, + 33.3977, + 33.3973 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 179 + }, + { + "timestamp_utc": "2025-12-08T22:16:53.836842+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:41Z\",\n \"avg_ns\": 564252829,\n \"stddev_ns\": 582139,\n \"avg_ts\": 226.848824,\n \"stddev_ts\": 0.233789,\n \"samples_ns\": [ 564600580, 564576006, 563581903 ],\n \"samples_ts\": [ 226.709, 226.719, 227.119 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:43Z\",\n \"avg_ns\": 3313411789,\n \"stddev_ns\": 809508,\n \"avg_ts\": 38.630878,\n \"stddev_ts\": 0.009390,\n \"samples_ns\": [ 3314240279, 3312631647, 3313363443 ],\n \"samples_ts\": [ 38.6212, 38.64, 38.6314 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:16:41Z", + "avg_ns": 564252829, + "stddev_ns": 582139, + "avg_ts": 226.848824, + "stddev_ts": 0.233789, + "samples_ns": [ + 564600580, + 564576006, + 563581903 + ], + "samples_ts": [ + 226.709, + 226.719, + 227.119 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:16:43Z", + "avg_ns": 3313411789, + "stddev_ns": 809508, + "avg_ts": 38.630878, + "stddev_ts": 0.00939, + "samples_ns": [ + 3314240279, + 3312631647, + 3313363443 + ], + "samples_ts": [ + 38.6212, + 38.64, + 38.6314 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 180 + }, + { + "timestamp_utc": "2025-12-08T22:17:37.465537+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:54Z\",\n \"avg_ns\": 564243992,\n \"stddev_ns\": 561868,\n \"avg_ts\": 226.852366,\n \"stddev_ts\": 0.225823,\n \"samples_ns\": [ 564604447, 564530334, 563597196 ],\n \"samples_ts\": [ 226.707, 226.737, 227.113 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:16:56Z\",\n \"avg_ns\": 13528502932,\n \"stddev_ns\": 10138243,\n \"avg_ts\": 37.846035,\n \"stddev_ts\": 0.028350,\n \"samples_ns\": [ 13540209181, 13522567395, 13522732220 ],\n \"samples_ts\": [ 37.8133, 37.8626, 37.8622 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:16:54Z", + "avg_ns": 564243992, + "stddev_ns": 561868, + "avg_ts": 226.852366, + "stddev_ts": 0.225823, + "samples_ns": [ + 564604447, + 564530334, + 563597196 + ], + "samples_ts": [ + 226.707, + 226.737, + 227.113 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:16:56Z", + "avg_ns": 13528502932, + "stddev_ns": 10138243, + "avg_ts": 37.846035, + "stddev_ts": 0.02835, + "samples_ns": [ + 13540209181, + 13522567395, + 13522732220 + ], + "samples_ts": [ + 37.8133, + 37.8626, + 37.8622 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 181 + }, + { + "timestamp_utc": "2025-12-08T22:17:57.550597+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:17:38Z\",\n \"avg_ns\": 2341389406,\n \"stddev_ns\": 98318,\n \"avg_ts\": 218.673579,\n \"stddev_ts\": 0.009182,\n \"samples_ns\": [ 2341323026, 2341502357, 2341342835 ],\n \"samples_ts\": [ 218.68, 218.663, 218.678 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:17:47Z\",\n \"avg_ns\": 3310050233,\n \"stddev_ns\": 343173,\n \"avg_ts\": 38.670108,\n \"stddev_ts\": 0.003895,\n \"samples_ns\": [ 3309669711, 3310290989, 3310190001 ],\n \"samples_ts\": [ 38.6746, 38.6673, 38.6685 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:17:38Z", + "avg_ns": 2341389406, + "stddev_ns": 98318, + "avg_ts": 218.673579, + "stddev_ts": 0.009182, + "samples_ns": [ + 2341323026, + 2341502357, + 2341342835 + ], + "samples_ts": [ + 218.68, + 218.663, + 218.678 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:17:47Z", + "avg_ns": 3310050233, + "stddev_ns": 343173, + "avg_ts": 38.670108, + "stddev_ts": 0.003895, + "samples_ns": [ + 3309669711, + 3310290989, + 3310190001 + ], + "samples_ts": [ + 38.6746, + 38.6673, + 38.6685 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 182 + }, + { + "timestamp_utc": "2025-12-08T22:18:48.525707+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:17:58Z\",\n \"avg_ns\": 2357303589,\n \"stddev_ns\": 291635,\n \"avg_ts\": 217.197313,\n \"stddev_ts\": 0.026869,\n \"samples_ns\": [ 2357068664, 2357629999, 2357212104 ],\n \"samples_ts\": [ 217.219, 217.167, 217.206 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:18:07Z\",\n \"avg_ns\": 13585674742,\n \"stddev_ns\": 13065770,\n \"avg_ts\": 37.686779,\n \"stddev_ts\": 0.036230,\n \"samples_ns\": [ 13575081075, 13581668618, 13600274533 ],\n \"samples_ts\": [ 37.7162, 37.6979, 37.6463 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:17:58Z", + "avg_ns": 2357303589, + "stddev_ns": 291635, + "avg_ts": 217.197313, + "stddev_ts": 0.026869, + "samples_ns": [ + 2357068664, + 2357629999, + 2357212104 + ], + "samples_ts": [ + 217.219, + 217.167, + 217.206 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:18:07Z", + "avg_ns": 13585674742, + "stddev_ns": 13065770, + "avg_ts": 37.686779, + "stddev_ts": 0.03623, + "samples_ns": [ + 13575081075, + 13581668618, + 13600274533 + ], + "samples_ts": [ + 37.7162, + 37.6979, + 37.6463 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 183 + }, + { + "timestamp_utc": "2025-12-08T22:19:01.489164+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:18:49Z\",\n \"avg_ns\": 564215593,\n \"stddev_ns\": 133766,\n \"avg_ts\": 226.863642,\n \"stddev_ts\": 0.052067,\n \"samples_ns\": [ 564255668, 564070814, 564320299 ],\n \"samples_ts\": [ 226.848, 226.922, 226.822 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:18:51Z\",\n \"avg_ns\": 3306323718,\n \"stddev_ns\": 988528,\n \"avg_ts\": 38.713695,\n \"stddev_ts\": 0.011533,\n \"samples_ns\": [ 3305748552, 3305761324, 3307461280 ],\n \"samples_ts\": [ 38.7204, 38.7203, 38.7004 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:18:49Z", + "avg_ns": 564215593, + "stddev_ns": 133766, + "avg_ts": 226.863642, + "stddev_ts": 0.052067, + "samples_ns": [ + 564255668, + 564070814, + 564320299 + ], + "samples_ts": [ + 226.848, + 226.922, + 226.822 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:18:51Z", + "avg_ns": 3306323718, + "stddev_ns": 988528, + "avg_ts": 38.713695, + "stddev_ts": 0.011533, + "samples_ns": [ + 3305748552, + 3305761324, + 3307461280 + ], + "samples_ts": [ + 38.7204, + 38.7203, + 38.7004 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 184 + }, + { + "timestamp_utc": "2025-12-08T22:19:45.052054+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:19:02Z\",\n \"avg_ns\": 563701221,\n \"stddev_ns\": 125758,\n \"avg_ts\": 227.070652,\n \"stddev_ts\": 0.048813,\n \"samples_ns\": [ 563636937, 563625715, 563841013 ],\n \"samples_ts\": [ 227.097, 227.101, 227.014 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:19:04Z\",\n \"avg_ns\": 13505800222,\n \"stddev_ns\": 1516481,\n \"avg_ts\": 37.909639,\n \"stddev_ts\": 0.004231,\n \"samples_ns\": [ 13507517405, 13505188951, 13504694312 ],\n \"samples_ts\": [ 37.9048, 37.9114, 37.9127 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:19:02Z", + "avg_ns": 563701221, + "stddev_ns": 125758, + "avg_ts": 227.070652, + "stddev_ts": 0.048813, + "samples_ns": [ + 563636937, + 563625715, + 563841013 + ], + "samples_ts": [ + 227.097, + 227.101, + 227.014 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:19:04Z", + "avg_ns": 13505800222, + "stddev_ns": 1516481, + "avg_ts": 37.909639, + "stddev_ts": 0.004231, + "samples_ns": [ + 13507517405, + 13505188951, + 13504694312 + ], + "samples_ts": [ + 37.9048, + 37.9114, + 37.9127 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 185 + }, + { + "timestamp_utc": "2025-12-08T22:20:05.538723+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:19:45Z\",\n \"avg_ns\": 2398911080,\n \"stddev_ns\": 219770,\n \"avg_ts\": 213.430171,\n \"stddev_ts\": 0.018555,\n \"samples_ns\": [ 2399151468, 2398778166, 2398803608 ],\n \"samples_ts\": [ 213.409, 213.442, 213.44 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:19:55Z\",\n \"avg_ns\": 3349885592,\n \"stddev_ns\": 829077,\n \"avg_ts\": 38.210262,\n \"stddev_ts\": 0.009457,\n \"samples_ns\": [ 3349011261, 3350660439, 3349985076 ],\n \"samples_ts\": [ 38.2202, 38.2014, 38.2091 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:19:45Z", + "avg_ns": 2398911080, + "stddev_ns": 219770, + "avg_ts": 213.430171, + "stddev_ts": 0.018555, + "samples_ns": [ + 2399151468, + 2398778166, + 2398803608 + ], + "samples_ts": [ + 213.409, + 213.442, + 213.44 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:19:55Z", + "avg_ns": 3349885592, + "stddev_ns": 829077, + "avg_ts": 38.210262, + "stddev_ts": 0.009457, + "samples_ns": [ + 3349011261, + 3350660439, + 3349985076 + ], + "samples_ts": [ + 38.2202, + 38.2014, + 38.2091 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 186 + }, + { + "timestamp_utc": "2025-12-08T22:20:56.360760+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:06Z\",\n \"avg_ns\": 2370335503,\n \"stddev_ns\": 61553,\n \"avg_ts\": 216.003177,\n \"stddev_ts\": 0.003432,\n \"samples_ns\": [ 2370292110, 2370359701, 2370354699 ],\n \"samples_ts\": [ 216.007, 216.001, 216.001 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:15Z\",\n \"avg_ns\": 13516110299,\n \"stddev_ns\": 5552668,\n \"avg_ts\": 37.880725,\n \"stddev_ts\": 0.015559,\n \"samples_ns\": [ 13510521269, 13521623392, 13516186237 ],\n \"samples_ts\": [ 37.8964, 37.8653, 37.8805 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:20:06Z", + "avg_ns": 2370335503, + "stddev_ns": 61553, + "avg_ts": 216.003177, + "stddev_ts": 0.003432, + "samples_ns": [ + 2370292110, + 2370359701, + 2370354699 + ], + "samples_ts": [ + 216.007, + 216.001, + 216.001 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:20:15Z", + "avg_ns": 13516110299, + "stddev_ns": 5552668, + "avg_ts": 37.880725, + "stddev_ts": 0.015559, + "samples_ns": [ + 13510521269, + 13521623392, + 13516186237 + ], + "samples_ts": [ + 37.8964, + 37.8653, + 37.8805 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 187 + }, + { + "timestamp_utc": "2025-12-08T22:21:09.374112+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:57Z\",\n \"avg_ns\": 563849381,\n \"stddev_ns\": 36401,\n \"avg_ts\": 227.010979,\n \"stddev_ts\": 0.011108,\n \"samples_ns\": [ 563817764, 563868579, 563861801 ],\n \"samples_ts\": [ 227.024, 227.003, 227.006 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:20:59Z\",\n \"avg_ns\": 3323769778,\n \"stddev_ns\": 1083681,\n \"avg_ts\": 38.510492,\n \"stddev_ts\": 0.012520,\n \"samples_ns\": [ 3324878903, 3322720140, 3323710293 ],\n \"samples_ts\": [ 38.4976, 38.5227, 38.5112 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:20:57Z", + "avg_ns": 563849381, + "stddev_ns": 36401, + "avg_ts": 227.010979, + "stddev_ts": 0.011108, + "samples_ns": [ + 563817764, + 563868579, + 563861801 + ], + "samples_ts": [ + 227.024, + 227.003, + 227.006 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:20:59Z", + "avg_ns": 3323769778, + "stddev_ns": 1083681, + "avg_ts": 38.510492, + "stddev_ts": 0.01252, + "samples_ns": [ + 3324878903, + 3322720140, + 3323710293 + ], + "samples_ts": [ + 38.4976, + 38.5227, + 38.5112 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 188 + }, + { + "timestamp_utc": "2025-12-08T22:21:53.087414+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:21:10Z\",\n \"avg_ns\": 563547166,\n \"stddev_ns\": 18763,\n \"avg_ts\": 227.132719,\n \"stddev_ts\": 0.007562,\n \"samples_ns\": [ 563530999, 563567741, 563542758 ],\n \"samples_ts\": [ 227.139, 227.124, 227.134 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:21:12Z\",\n \"avg_ns\": 13557106994,\n \"stddev_ns\": 4292674,\n \"avg_ts\": 37.766172,\n \"stddev_ts\": 0.011953,\n \"samples_ns\": [ 13561692932, 13556438951, 13553189100 ],\n \"samples_ts\": [ 37.7534, 37.768, 37.7771 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:21:10Z", + "avg_ns": 563547166, + "stddev_ns": 18763, + "avg_ts": 227.132719, + "stddev_ts": 0.007562, + "samples_ns": [ + 563530999, + 563567741, + 563542758 + ], + "samples_ts": [ + 227.139, + 227.124, + 227.134 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:21:12Z", + "avg_ns": 13557106994, + "stddev_ns": 4292674, + "avg_ts": 37.766172, + "stddev_ts": 0.011953, + "samples_ns": [ + 13561692932, + 13556438951, + 13553189100 + ], + "samples_ts": [ + 37.7534, + 37.768, + 37.7771 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 189 + }, + { + "timestamp_utc": "2025-12-08T22:22:14.045758+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:21:53Z\",\n \"avg_ns\": 2551444093,\n \"stddev_ns\": 253742,\n \"avg_ts\": 200.670673,\n \"stddev_ts\": 0.019556,\n \"samples_ns\": [ 2551272569, 2551729275, 2551330436 ],\n \"samples_ts\": [ 200.684, 200.648, 200.68 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:22:03Z\",\n \"avg_ns\": 3319742872,\n \"stddev_ns\": 366795,\n \"avg_ts\": 38.557203,\n \"stddev_ts\": 0.004207,\n \"samples_ns\": [ 3319826691, 3320055857, 3319346069 ],\n \"samples_ts\": [ 38.5562, 38.5536, 38.5618 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:21:53Z", + "avg_ns": 2551444093, + "stddev_ns": 253742, + "avg_ts": 200.670673, + "stddev_ts": 0.019556, + "samples_ns": [ + 2551272569, + 2551729275, + 2551330436 + ], + "samples_ts": [ + 200.684, + 200.648, + 200.68 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:22:03Z", + "avg_ns": 3319742872, + "stddev_ns": 366795, + "avg_ts": 38.557203, + "stddev_ts": 0.004207, + "samples_ns": [ + 3319826691, + 3320055857, + 3319346069 + ], + "samples_ts": [ + 38.5562, + 38.5536, + 38.5618 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 190 + }, + { + "timestamp_utc": "2025-12-08T22:23:05.671418+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:22:14Z\",\n \"avg_ns\": 2543950835,\n \"stddev_ns\": 407965,\n \"avg_ts\": 201.261754,\n \"stddev_ts\": 0.031778,\n \"samples_ns\": [ 2544357860, 2543939928, 2543554719 ],\n \"samples_ts\": [ 201.23, 201.263, 201.293 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:22:24Z\",\n \"avg_ns\": 13551267684,\n \"stddev_ns\": 7336103,\n \"avg_ts\": 37.782450,\n \"stddev_ts\": 0.020456,\n \"samples_ns\": [ 13543497168, 13552231859, 13558074025 ],\n \"samples_ts\": [ 37.8041, 37.7798, 37.7635 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:22:14Z", + "avg_ns": 2543950835, + "stddev_ns": 407965, + "avg_ts": 201.261754, + "stddev_ts": 0.031778, + "samples_ns": [ + 2544357860, + 2543939928, + 2543554719 + ], + "samples_ts": [ + 201.23, + 201.263, + 201.293 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:22:24Z", + "avg_ns": 13551267684, + "stddev_ns": 7336103, + "avg_ts": 37.78245, + "stddev_ts": 0.020456, + "samples_ns": [ + 13543497168, + 13552231859, + 13558074025 + ], + "samples_ts": [ + 37.8041, + 37.7798, + 37.7635 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 191 + }, + { + "timestamp_utc": "2025-12-08T22:23:18.664948+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:23:06Z\",\n \"avg_ns\": 563129753,\n \"stddev_ns\": 105877,\n \"avg_ts\": 227.301084,\n \"stddev_ts\": 0.042734,\n \"samples_ns\": [ 563243088, 563033381, 563112790 ],\n \"samples_ts\": [ 227.255, 227.34, 227.308 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:23:08Z\",\n \"avg_ns\": 3317865230,\n \"stddev_ns\": 663595,\n \"avg_ts\": 38.579024,\n \"stddev_ts\": 0.007716,\n \"samples_ns\": [ 3318535523, 3317851625, 3317208542 ],\n \"samples_ts\": [ 38.5712, 38.5792, 38.5867 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:23:06Z", + "avg_ns": 563129753, + "stddev_ns": 105877, + "avg_ts": 227.301084, + "stddev_ts": 0.042734, + "samples_ns": [ + 563243088, + 563033381, + 563112790 + ], + "samples_ts": [ + 227.255, + 227.34, + 227.308 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:23:08Z", + "avg_ns": 3317865230, + "stddev_ns": 663595, + "avg_ts": 38.579024, + "stddev_ts": 0.007716, + "samples_ns": [ + 3318535523, + 3317851625, + 3317208542 + ], + "samples_ts": [ + 38.5712, + 38.5792, + 38.5867 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 192 + }, + { + "timestamp_utc": "2025-12-08T22:24:02.450465+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:23:19Z\",\n \"avg_ns\": 563841781,\n \"stddev_ns\": 90466,\n \"avg_ts\": 227.014042,\n \"stddev_ts\": 0.033819,\n \"samples_ns\": [ 563934869, 563818849, 563771627 ],\n \"samples_ts\": [ 226.977, 227.023, 227.042 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:23:21Z\",\n \"avg_ns\": 13552453151,\n \"stddev_ns\": 3685866,\n \"avg_ts\": 37.779140,\n \"stddev_ts\": 0.010266,\n \"samples_ns\": [ 13548241786, 13555065595, 13554052074 ],\n \"samples_ts\": [ 37.7909, 37.7719, 37.7747 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:23:19Z", + "avg_ns": 563841781, + "stddev_ns": 90466, + "avg_ts": 227.014042, + "stddev_ts": 0.033819, + "samples_ns": [ + 563934869, + 563818849, + 563771627 + ], + "samples_ts": [ + 226.977, + 227.023, + 227.042 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:23:21Z", + "avg_ns": 13552453151, + "stddev_ns": 3685866, + "avg_ts": 37.77914, + "stddev_ts": 0.010266, + "samples_ns": [ + 13548241786, + 13555065595, + 13554052074 + ], + "samples_ts": [ + 37.7909, + 37.7719, + 37.7747 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 193 + }, + { + "timestamp_utc": "2025-12-08T22:24:22.616180+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:24:03Z\",\n \"avg_ns\": 2343262684,\n \"stddev_ns\": 838424,\n \"avg_ts\": 218.498783,\n \"stddev_ts\": 0.078164,\n \"samples_ns\": [ 2342852886, 2342707975, 2344227191 ],\n \"samples_ts\": [ 218.537, 218.551, 218.409 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:24:12Z\",\n \"avg_ns\": 3322675711,\n \"stddev_ns\": 1221938,\n \"avg_ts\": 38.523173,\n \"stddev_ts\": 0.014139,\n \"samples_ns\": [ 3323374511, 3323384730, 3321267894 ],\n \"samples_ts\": [ 38.5151, 38.515, 38.5395 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:24:03Z", + "avg_ns": 2343262684, + "stddev_ns": 838424, + "avg_ts": 218.498783, + "stddev_ts": 0.078164, + "samples_ns": [ + 2342852886, + 2342707975, + 2344227191 + ], + "samples_ts": [ + 218.537, + 218.551, + 218.409 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:24:12Z", + "avg_ns": 3322675711, + "stddev_ns": 1221938, + "avg_ts": 38.523173, + "stddev_ts": 0.014139, + "samples_ns": [ + 3323374511, + 3323384730, + 3321267894 + ], + "samples_ts": [ + 38.5151, + 38.515, + 38.5395 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 194 + }, + { + "timestamp_utc": "2025-12-08T22:25:13.916206+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:24:23Z\",\n \"avg_ns\": 2358878369,\n \"stddev_ns\": 451374,\n \"avg_ts\": 217.052316,\n \"stddev_ts\": 0.041530,\n \"samples_ns\": [ 2359381949, 2358742960, 2358510198 ],\n \"samples_ts\": [ 217.006, 217.065, 217.086 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:24:32Z\",\n \"avg_ns\": 13678625985,\n \"stddev_ns\": 7863490,\n \"avg_ts\": 37.430668,\n \"stddev_ts\": 0.021512,\n \"samples_ns\": [ 13687472151, 13675975893, 13672429911 ],\n \"samples_ts\": [ 37.4065, 37.4379, 37.4476 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:24:23Z", + "avg_ns": 2358878369, + "stddev_ns": 451374, + "avg_ts": 217.052316, + "stddev_ts": 0.04153, + "samples_ns": [ + 2359381949, + 2358742960, + 2358510198 + ], + "samples_ts": [ + 217.006, + 217.065, + 217.086 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:24:32Z", + "avg_ns": 13678625985, + "stddev_ns": 7863490, + "avg_ts": 37.430668, + "stddev_ts": 0.021512, + "samples_ns": [ + 13687472151, + 13675975893, + 13672429911 + ], + "samples_ts": [ + 37.4065, + 37.4379, + 37.4476 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 195 + }, + { + "timestamp_utc": "2025-12-08T22:25:26.968213+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:25:14Z\",\n \"avg_ns\": 564539340,\n \"stddev_ns\": 198447,\n \"avg_ts\": 226.733553,\n \"stddev_ts\": 0.079126,\n \"samples_ns\": [ 564348413, 564527673, 564741935 ],\n \"samples_ts\": [ 226.81, 226.738, 226.652 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:25:16Z\",\n \"avg_ns\": 3323240266,\n \"stddev_ns\": 560560,\n \"avg_ts\": 38.516626,\n \"stddev_ts\": 0.006497,\n \"samples_ns\": [ 3323340627, 3322636304, 3323743867 ],\n \"samples_ts\": [ 38.5155, 38.5236, 38.5108 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:25:14Z", + "avg_ns": 564539340, + "stddev_ns": 198447, + "avg_ts": 226.733553, + "stddev_ts": 0.079126, + "samples_ns": [ + 564348413, + 564527673, + 564741935 + ], + "samples_ts": [ + 226.81, + 226.738, + 226.652 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:25:16Z", + "avg_ns": 3323240266, + "stddev_ns": 560560, + "avg_ts": 38.516626, + "stddev_ts": 0.006497, + "samples_ns": [ + 3323340627, + 3322636304, + 3323743867 + ], + "samples_ts": [ + 38.5155, + 38.5236, + 38.5108 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 196 + }, + { + "timestamp_utc": "2025-12-08T22:26:10.649997+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:25:27Z\",\n \"avg_ns\": 563783942,\n \"stddev_ns\": 104921,\n \"avg_ts\": 227.037333,\n \"stddev_ts\": 0.041154,\n \"samples_ns\": [ 563761998, 563895332, 563694497 ],\n \"samples_ts\": [ 227.046, 226.992, 227.073 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:25:29Z\",\n \"avg_ns\": 13532061980,\n \"stddev_ns\": 1801185,\n \"avg_ts\": 37.836067,\n \"stddev_ts\": 0.005036,\n \"samples_ns\": [ 13530499022, 13531655124, 13534031794 ],\n \"samples_ts\": [ 37.8404, 37.8372, 37.8306 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:25:27Z", + "avg_ns": 563783942, + "stddev_ns": 104921, + "avg_ts": 227.037333, + "stddev_ts": 0.041154, + "samples_ns": [ + 563761998, + 563895332, + 563694497 + ], + "samples_ts": [ + 227.046, + 226.992, + 227.073 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:25:29Z", + "avg_ns": 13532061980, + "stddev_ns": 1801185, + "avg_ts": 37.836067, + "stddev_ts": 0.005036, + "samples_ns": [ + 13530499022, + 13531655124, + 13534031794 + ], + "samples_ts": [ + 37.8404, + 37.8372, + 37.8306 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 197 + }, + { + "timestamp_utc": "2025-12-08T22:26:31.052803+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:26:11Z\",\n \"avg_ns\": 2402746245,\n \"stddev_ns\": 347714,\n \"avg_ts\": 213.089504,\n \"stddev_ts\": 0.030216,\n \"samples_ns\": [ 2402613079, 2403133453, 2402492205 ],\n \"samples_ts\": [ 213.101, 213.055, 213.112 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:26:20Z\",\n \"avg_ns\": 3319432286,\n \"stddev_ns\": 1122756,\n \"avg_ts\": 38.560814,\n \"stddev_ts\": 0.013043,\n \"samples_ns\": [ 3320556613, 3319429138, 3318311107 ],\n \"samples_ts\": [ 38.5478, 38.5608, 38.5738 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:26:11Z", + "avg_ns": 2402746245, + "stddev_ns": 347714, + "avg_ts": 213.089504, + "stddev_ts": 0.030216, + "samples_ns": [ + 2402613079, + 2403133453, + 2402492205 + ], + "samples_ts": [ + 213.101, + 213.055, + 213.112 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:26:20Z", + "avg_ns": 3319432286, + "stddev_ns": 1122756, + "avg_ts": 38.560814, + "stddev_ts": 0.013043, + "samples_ns": [ + 3320556613, + 3319429138, + 3318311107 + ], + "samples_ts": [ + 38.5478, + 38.5608, + 38.5738 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 198 + }, + { + "timestamp_utc": "2025-12-08T22:27:22.022339+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:26:31Z\",\n \"avg_ns\": 2368973475,\n \"stddev_ns\": 162721,\n \"avg_ts\": 216.127368,\n \"stddev_ts\": 0.013452,\n \"samples_ns\": [ 2368934581, 2368849376, 2369136470 ],\n \"samples_ts\": [ 216.131, 216.139, 216.112 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:26:41Z\",\n \"avg_ns\": 13547462630,\n \"stddev_ns\": 10347462,\n \"avg_ts\": 37.793070,\n \"stddev_ts\": 0.028852,\n \"samples_ns\": [ 13559138849, 13543813659, 13539435384 ],\n \"samples_ts\": [ 37.7605, 37.8032, 37.8155 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:26:31Z", + "avg_ns": 2368973475, + "stddev_ns": 162721, + "avg_ts": 216.127368, + "stddev_ts": 0.013452, + "samples_ns": [ + 2368934581, + 2368849376, + 2369136470 + ], + "samples_ts": [ + 216.131, + 216.139, + 216.112 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:26:41Z", + "avg_ns": 13547462630, + "stddev_ns": 10347462, + "avg_ts": 37.79307, + "stddev_ts": 0.028852, + "samples_ns": [ + 13559138849, + 13543813659, + 13539435384 + ], + "samples_ts": [ + 37.7605, + 37.8032, + 37.8155 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 199 + }, + { + "timestamp_utc": "2025-12-08T22:27:35.085150+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:27:22Z\",\n \"avg_ns\": 564249094,\n \"stddev_ns\": 138369,\n \"avg_ts\": 226.850173,\n \"stddev_ts\": 0.053971,\n \"samples_ns\": [ 564293750, 564355305, 564098229 ],\n \"samples_ts\": [ 226.832, 226.807, 226.911 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:27:25Z\",\n \"avg_ns\": 3326073054,\n \"stddev_ns\": 1327832,\n \"avg_ts\": 38.483825,\n \"stddev_ts\": 0.015332,\n \"samples_ns\": [ 3327568860, 3325045132, 3325605172 ],\n \"samples_ts\": [ 38.4665, 38.4957, 38.4892 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:27:22Z", + "avg_ns": 564249094, + "stddev_ns": 138369, + "avg_ts": 226.850173, + "stddev_ts": 0.053971, + "samples_ns": [ + 564293750, + 564355305, + 564098229 + ], + "samples_ts": [ + 226.832, + 226.807, + 226.911 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:27:25Z", + "avg_ns": 3326073054, + "stddev_ns": 1327832, + "avg_ts": 38.483825, + "stddev_ts": 0.015332, + "samples_ns": [ + 3327568860, + 3325045132, + 3325605172 + ], + "samples_ts": [ + 38.4665, + 38.4957, + 38.4892 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 200 + }, + { + "timestamp_utc": "2025-12-08T22:28:18.885851+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:27:35Z\",\n \"avg_ns\": 563611347,\n \"stddev_ns\": 84070,\n \"avg_ts\": 227.106858,\n \"stddev_ts\": 0.033875,\n \"samples_ns\": [ 563535853, 563701946, 563596242 ],\n \"samples_ts\": [ 227.137, 227.07, 227.113 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:27:38Z\",\n \"avg_ns\": 13573259756,\n \"stddev_ns\": 2246318,\n \"avg_ts\": 37.721227,\n \"stddev_ts\": 0.006226,\n \"samples_ns\": [ 13575233619, 13570824827, 13573720824 ],\n \"samples_ts\": [ 37.7157, 37.728, 37.7199 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:27:35Z", + "avg_ns": 563611347, + "stddev_ns": 84070, + "avg_ts": 227.106858, + "stddev_ts": 0.033875, + "samples_ns": [ + 563535853, + 563701946, + 563596242 + ], + "samples_ts": [ + 227.137, + 227.07, + 227.113 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:27:38Z", + "avg_ns": 13573259756, + "stddev_ns": 2246318, + "avg_ts": 37.721227, + "stddev_ts": 0.006226, + "samples_ns": [ + 13575233619, + 13570824827, + 13573720824 + ], + "samples_ts": [ + 37.7157, + 37.728, + 37.7199 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 201 + }, + { + "timestamp_utc": "2025-12-08T22:28:40.062128+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:28:19Z\",\n \"avg_ns\": 2601409603,\n \"stddev_ns\": 663425,\n \"avg_ts\": 196.816380,\n \"stddev_ts\": 0.050050,\n \"samples_ns\": [ 2600664974, 2601929152, 2601634684 ],\n \"samples_ts\": [ 196.873, 196.777, 196.799 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:28:30Z\",\n \"avg_ns\": 3313554524,\n \"stddev_ns\": 687786,\n \"avg_ts\": 38.629213,\n \"stddev_ts\": 0.007962,\n \"samples_ns\": [ 3313553403, 3314238037, 3312872134 ],\n \"samples_ts\": [ 38.6292, 38.6212, 38.6372 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:28:19Z", + "avg_ns": 2601409603, + "stddev_ns": 663425, + "avg_ts": 196.81638, + "stddev_ts": 0.05005, + "samples_ns": [ + 2600664974, + 2601929152, + 2601634684 + ], + "samples_ts": [ + 196.873, + 196.777, + 196.799 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:28:30Z", + "avg_ns": 3313554524, + "stddev_ns": 687786, + "avg_ts": 38.629213, + "stddev_ts": 0.007962, + "samples_ns": [ + 3313553403, + 3314238037, + 3312872134 + ], + "samples_ts": [ + 38.6292, + 38.6212, + 38.6372 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 202 + }, + { + "timestamp_utc": "2025-12-08T22:29:31.941846+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:28:40Z\",\n \"avg_ns\": 2603342655,\n \"stddev_ns\": 1051571,\n \"avg_ts\": 196.670252,\n \"stddev_ts\": 0.079338,\n \"samples_ns\": [ 2604487537, 2602423610, 2603116819 ],\n \"samples_ts\": [ 196.584, 196.74, 196.687 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:28:51Z\",\n \"avg_ns\": 13534911856,\n \"stddev_ns\": 3585314,\n \"avg_ts\": 37.828102,\n \"stddev_ts\": 0.010020,\n \"samples_ns\": [ 13538473080, 13534959560, 13531302928 ],\n \"samples_ts\": [ 37.8181, 37.828, 37.8382 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:28:40Z", + "avg_ns": 2603342655, + "stddev_ns": 1051571, + "avg_ts": 196.670252, + "stddev_ts": 0.079338, + "samples_ns": [ + 2604487537, + 2602423610, + 2603116819 + ], + "samples_ts": [ + 196.584, + 196.74, + 196.687 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:28:51Z", + "avg_ns": 13534911856, + "stddev_ns": 3585314, + "avg_ts": 37.828102, + "stddev_ts": 0.01002, + "samples_ns": [ + 13538473080, + 13534959560, + 13531302928 + ], + "samples_ts": [ + 37.8181, + 37.828, + 37.8382 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 203 + }, + { + "timestamp_utc": "2025-12-08T22:29:44.947285+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:29:32Z\",\n \"avg_ns\": 563743968,\n \"stddev_ns\": 180906,\n \"avg_ts\": 227.053443,\n \"stddev_ts\": 0.072862,\n \"samples_ns\": [ 563745863, 563923920, 563562121 ],\n \"samples_ts\": [ 227.053, 226.981, 227.127 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:29:34Z\",\n \"avg_ns\": 3307000837,\n \"stddev_ns\": 410842,\n \"avg_ts\": 38.705766,\n \"stddev_ts\": 0.004762,\n \"samples_ns\": [ 3307307205, 3307156018, 3306539289 ],\n \"samples_ts\": [ 38.7022, 38.7039, 38.7112 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:29:32Z", + "avg_ns": 563743968, + "stddev_ns": 180906, + "avg_ts": 227.053443, + "stddev_ts": 0.072862, + "samples_ns": [ + 563745863, + 563923920, + 563562121 + ], + "samples_ts": [ + 227.053, + 226.981, + 227.127 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:29:34Z", + "avg_ns": 3307000837, + "stddev_ns": 410842, + "avg_ts": 38.705766, + "stddev_ts": 0.004762, + "samples_ns": [ + 3307307205, + 3307156018, + 3306539289 + ], + "samples_ts": [ + 38.7022, + 38.7039, + 38.7112 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 204 + }, + { + "timestamp_utc": "2025-12-08T22:30:28.492729+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:29:45Z\",\n \"avg_ns\": 563479699,\n \"stddev_ns\": 120352,\n \"avg_ts\": 227.159920,\n \"stddev_ts\": 0.046588,\n \"samples_ns\": [ 563411156, 563613139, 563414804 ],\n \"samples_ts\": [ 227.188, 227.106, 227.186 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:29:47Z\",\n \"avg_ns\": 13488554276,\n \"stddev_ns\": 20803683,\n \"avg_ts\": 37.958168,\n \"stddev_ts\": 0.058515,\n \"samples_ns\": [ 13470291049, 13484172669, 13511199112 ],\n \"samples_ts\": [ 38.0096, 37.9704, 37.8945 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:29:45Z", + "avg_ns": 563479699, + "stddev_ns": 120352, + "avg_ts": 227.15992, + "stddev_ts": 0.046588, + "samples_ns": [ + 563411156, + 563613139, + 563414804 + ], + "samples_ts": [ + 227.188, + 227.106, + 227.186 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:29:47Z", + "avg_ns": 13488554276, + "stddev_ns": 20803683, + "avg_ts": 37.958168, + "stddev_ts": 0.058515, + "samples_ns": [ + 13470291049, + 13484172669, + 13511199112 + ], + "samples_ts": [ + 38.0096, + 37.9704, + 37.8945 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 205 + }, + { + "timestamp_utc": "2025-12-08T22:30:48.645681+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:30:29Z\",\n \"avg_ns\": 2346763037,\n \"stddev_ns\": 254182,\n \"avg_ts\": 218.172860,\n \"stddev_ts\": 0.023629,\n \"samples_ns\": [ 2346582981, 2347053798, 2346652332 ],\n \"samples_ts\": [ 218.19, 218.146, 218.183 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:30:38Z\",\n \"avg_ns\": 3312684911,\n \"stddev_ns\": 1748295,\n \"avg_ts\": 38.639360,\n \"stddev_ts\": 0.020376,\n \"samples_ns\": [ 3312086537, 3314652836, 3311315361 ],\n \"samples_ts\": [ 38.6463, 38.6164, 38.6553 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:30:29Z", + "avg_ns": 2346763037, + "stddev_ns": 254182, + "avg_ts": 218.17286, + "stddev_ts": 0.023629, + "samples_ns": [ + 2346582981, + 2347053798, + 2346652332 + ], + "samples_ts": [ + 218.19, + 218.146, + 218.183 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:30:38Z", + "avg_ns": 3312684911, + "stddev_ns": 1748295, + "avg_ts": 38.63936, + "stddev_ts": 0.020376, + "samples_ns": [ + 3312086537, + 3314652836, + 3311315361 + ], + "samples_ts": [ + 38.6463, + 38.6164, + 38.6553 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 206 + }, + { + "timestamp_utc": "2025-12-08T22:31:39.428000+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:30:49Z\",\n \"avg_ns\": 2366892996,\n \"stddev_ns\": 225217,\n \"avg_ts\": 216.317343,\n \"stddev_ts\": 0.020584,\n \"samples_ns\": [ 2366942361, 2366647191, 2367089436 ],\n \"samples_ts\": [ 216.313, 216.34, 216.299 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:30:58Z\",\n \"avg_ns\": 13495526351,\n \"stddev_ns\": 6264885,\n \"avg_ts\": 37.938503,\n \"stddev_ts\": 0.017609,\n \"samples_ns\": [ 13500626837, 13497415770, 13488536448 ],\n \"samples_ts\": [ 37.9242, 37.9332, 37.9582 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:30:49Z", + "avg_ns": 2366892996, + "stddev_ns": 225217, + "avg_ts": 216.317343, + "stddev_ts": 0.020584, + "samples_ns": [ + 2366942361, + 2366647191, + 2367089436 + ], + "samples_ts": [ + 216.313, + 216.34, + 216.299 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:30:58Z", + "avg_ns": 13495526351, + "stddev_ns": 6264885, + "avg_ts": 37.938503, + "stddev_ts": 0.017609, + "samples_ns": [ + 13500626837, + 13497415770, + 13488536448 + ], + "samples_ts": [ + 37.9242, + 37.9332, + 37.9582 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 207 + }, + { + "timestamp_utc": "2025-12-08T22:31:52.437408+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:31:40Z\",\n \"avg_ns\": 563656600,\n \"stddev_ns\": 28592,\n \"avg_ts\": 227.088621,\n \"stddev_ts\": 0.006419,\n \"samples_ns\": [ 563657131, 563672261, 563640409 ],\n \"samples_ts\": [ 227.088, 227.082, 227.095 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:31:42Z\",\n \"avg_ns\": 3310183779,\n \"stddev_ns\": 609315,\n \"avg_ts\": 38.668549,\n \"stddev_ts\": 0.007086,\n \"samples_ns\": [ 3309517898, 3310328552, 3310704888 ],\n \"samples_ts\": [ 38.6763, 38.6669, 38.6625 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:31:40Z", + "avg_ns": 563656600, + "stddev_ns": 28592, + "avg_ts": 227.088621, + "stddev_ts": 0.006419, + "samples_ns": [ + 563657131, + 563672261, + 563640409 + ], + "samples_ts": [ + 227.088, + 227.082, + 227.095 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:31:42Z", + "avg_ns": 3310183779, + "stddev_ns": 609315, + "avg_ts": 38.668549, + "stddev_ts": 0.007086, + "samples_ns": [ + 3309517898, + 3310328552, + 3310704888 + ], + "samples_ts": [ + 38.6763, + 38.6669, + 38.6625 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 208 + }, + { + "timestamp_utc": "2025-12-08T22:32:36.170036+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:31:53Z\",\n \"avg_ns\": 564246593,\n \"stddev_ns\": 168278,\n \"avg_ts\": 226.851183,\n \"stddev_ts\": 0.066974,\n \"samples_ns\": [ 564424064, 564093580, 564222136 ],\n \"samples_ts\": [ 226.78, 226.913, 226.861 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:31:55Z\",\n \"avg_ns\": 13550245227,\n \"stddev_ns\": 11752600,\n \"avg_ts\": 37.785313,\n \"stddev_ts\": 0.032760,\n \"samples_ns\": [ 13546491039, 13540828377, 13563416265 ],\n \"samples_ts\": [ 37.7958, 37.8116, 37.7486 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:31:53Z", + "avg_ns": 564246593, + "stddev_ns": 168278, + "avg_ts": 226.851183, + "stddev_ts": 0.066974, + "samples_ns": [ + 564424064, + 564093580, + 564222136 + ], + "samples_ts": [ + 226.78, + 226.913, + 226.861 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:31:55Z", + "avg_ns": 13550245227, + "stddev_ns": 11752600, + "avg_ts": 37.785313, + "stddev_ts": 0.03276, + "samples_ns": [ + 13546491039, + 13540828377, + 13563416265 + ], + "samples_ts": [ + 37.7958, + 37.8116, + 37.7486 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 209 + }, + { + "timestamp_utc": "2025-12-08T22:32:56.528877+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:32:36Z\",\n \"avg_ns\": 2398032224,\n \"stddev_ns\": 334675,\n \"avg_ts\": 213.508393,\n \"stddev_ts\": 0.029799,\n \"samples_ns\": [ 2398327653, 2398100262, 2397668757 ],\n \"samples_ts\": [ 213.482, 213.502, 213.541 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:32:46Z\",\n \"avg_ns\": 3312094621,\n \"stddev_ns\": 1861039,\n \"avg_ts\": 38.646247,\n \"stddev_ts\": 0.021694,\n \"samples_ns\": [ 3310247384, 3312070807, 3313965674 ],\n \"samples_ts\": [ 38.6678, 38.6465, 38.6244 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:32:36Z", + "avg_ns": 2398032224, + "stddev_ns": 334675, + "avg_ts": 213.508393, + "stddev_ts": 0.029799, + "samples_ns": [ + 2398327653, + 2398100262, + 2397668757 + ], + "samples_ts": [ + 213.482, + 213.502, + 213.541 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:32:46Z", + "avg_ns": 3312094621, + "stddev_ns": 1861039, + "avg_ts": 38.646247, + "stddev_ts": 0.021694, + "samples_ns": [ + 3310247384, + 3312070807, + 3313965674 + ], + "samples_ts": [ + 38.6678, + 38.6465, + 38.6244 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 210 + }, + { + "timestamp_utc": "2025-12-08T22:33:47.779848+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:32:57Z\",\n \"avg_ns\": 2400175196,\n \"stddev_ns\": 211828,\n \"avg_ts\": 213.317763,\n \"stddev_ts\": 0.018316,\n \"samples_ns\": [ 2400370390, 2399959719, 2400195480 ],\n \"samples_ts\": [ 213.3, 213.337, 213.316 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:33:06Z\",\n \"avg_ns\": 13607104889,\n \"stddev_ns\": 7503836,\n \"avg_ts\": 37.627409,\n \"stddev_ts\": 0.020749,\n \"samples_ns\": [ 13599764648, 13614762266, 13606787753 ],\n \"samples_ts\": [ 37.6477, 37.6062, 37.6283 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:32:57Z", + "avg_ns": 2400175196, + "stddev_ns": 211828, + "avg_ts": 213.317763, + "stddev_ts": 0.018316, + "samples_ns": [ + 2400370390, + 2399959719, + 2400195480 + ], + "samples_ts": [ + 213.3, + 213.337, + 213.316 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:33:06Z", + "avg_ns": 13607104889, + "stddev_ns": 7503836, + "avg_ts": 37.627409, + "stddev_ts": 0.020749, + "samples_ns": [ + 13599764648, + 13614762266, + 13606787753 + ], + "samples_ts": [ + 37.6477, + 37.6062, + 37.6283 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 211 + }, + { + "timestamp_utc": "2025-12-08T22:34:00.867971+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:33:48Z\",\n \"avg_ns\": 563897201,\n \"stddev_ns\": 116121,\n \"avg_ts\": 226.991734,\n \"stddev_ts\": 0.045750,\n \"samples_ns\": [ 563828553, 564028406, 563834645 ],\n \"samples_ts\": [ 227.019, 226.939, 227.017 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:33:50Z\",\n \"avg_ns\": 3320355980,\n \"stddev_ns\": 1718568,\n \"avg_ts\": 38.550090,\n \"stddev_ts\": 0.019959,\n \"samples_ns\": [ 3321445837, 3318374863, 3321247240 ],\n \"samples_ts\": [ 38.5374, 38.5731, 38.5397 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:33:48Z", + "avg_ns": 563897201, + "stddev_ns": 116121, + "avg_ts": 226.991734, + "stddev_ts": 0.04575, + "samples_ns": [ + 563828553, + 564028406, + 563834645 + ], + "samples_ts": [ + 227.019, + 226.939, + 227.017 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:33:50Z", + "avg_ns": 3320355980, + "stddev_ns": 1718568, + "avg_ts": 38.55009, + "stddev_ts": 0.019959, + "samples_ns": [ + 3321445837, + 3318374863, + 3321247240 + ], + "samples_ts": [ + 38.5374, + 38.5731, + 38.5397 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 212 + }, + { + "timestamp_utc": "2025-12-08T22:34:44.595872+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:34:01Z\",\n \"avg_ns\": 563754205,\n \"stddev_ns\": 62835,\n \"avg_ts\": 227.049306,\n \"stddev_ts\": 0.023430,\n \"samples_ns\": [ 563707477, 563819366, 563735773 ],\n \"samples_ts\": [ 227.068, 227.023, 227.057 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:34:03Z\",\n \"avg_ns\": 13548232880,\n \"stddev_ns\": 1329918,\n \"avg_ts\": 37.790907,\n \"stddev_ts\": 0.003696,\n \"samples_ns\": [ 13546737517, 13548701187, 13549259937 ],\n \"samples_ts\": [ 37.7951, 37.7896, 37.788 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:34:01Z", + "avg_ns": 563754205, + "stddev_ns": 62835, + "avg_ts": 227.049306, + "stddev_ts": 0.02343, + "samples_ns": [ + 563707477, + 563819366, + 563735773 + ], + "samples_ts": [ + 227.068, + 227.023, + 227.057 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:34:03Z", + "avg_ns": 13548232880, + "stddev_ns": 1329918, + "avg_ts": 37.790907, + "stddev_ts": 0.003696, + "samples_ns": [ + 13546737517, + 13548701187, + 13549259937 + ], + "samples_ts": [ + 37.7951, + 37.7896, + 37.788 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 213 + }, + { + "timestamp_utc": "2025-12-08T22:35:05.601684+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:34:45Z\",\n \"avg_ns\": 2545117084,\n \"stddev_ns\": 177455,\n \"avg_ts\": 201.169528,\n \"stddev_ts\": 0.013448,\n \"samples_ns\": [ 2545162203, 2545260110, 2544928940 ],\n \"samples_ts\": [ 201.166, 201.158, 201.184 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:34:55Z\",\n \"avg_ns\": 3323541646,\n \"stddev_ns\": 625600,\n \"avg_ts\": 38.513133,\n \"stddev_ts\": 0.007187,\n \"samples_ns\": [ 3323403597, 3323002037, 3324219306 ],\n \"samples_ts\": [ 38.5147, 38.5194, 38.5053 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:34:45Z", + "avg_ns": 2545117084, + "stddev_ns": 177455, + "avg_ts": 201.169528, + "stddev_ts": 0.013448, + "samples_ns": [ + 2545162203, + 2545260110, + 2544928940 + ], + "samples_ts": [ + 201.166, + 201.158, + 201.184 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:34:55Z", + "avg_ns": 3323541646, + "stddev_ns": 625600, + "avg_ts": 38.513133, + "stddev_ts": 0.007187, + "samples_ns": [ + 3323403597, + 3323002037, + 3324219306 + ], + "samples_ts": [ + 38.5147, + 38.5194, + 38.5053 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 214 + }, + { + "timestamp_utc": "2025-12-08T22:35:57.257613+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:35:06Z\",\n \"avg_ns\": 2557161312,\n \"stddev_ns\": 483013,\n \"avg_ts\": 200.222023,\n \"stddev_ts\": 0.037819,\n \"samples_ns\": [ 2557650662, 2557148380, 2556684894 ],\n \"samples_ts\": [ 200.184, 200.223, 200.259 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:35:16Z\",\n \"avg_ns\": 13530268650,\n \"stddev_ns\": 459768,\n \"avg_ts\": 37.841082,\n \"stddev_ts\": 0.001244,\n \"samples_ns\": [ 13530735191, 13529849338, 13530221422 ],\n \"samples_ts\": [ 37.8398, 37.8423, 37.8412 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:35:06Z", + "avg_ns": 2557161312, + "stddev_ns": 483013, + "avg_ts": 200.222023, + "stddev_ts": 0.037819, + "samples_ns": [ + 2557650662, + 2557148380, + 2556684894 + ], + "samples_ts": [ + 200.184, + 200.223, + 200.259 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:35:16Z", + "avg_ns": 13530268650, + "stddev_ns": 459768, + "avg_ts": 37.841082, + "stddev_ts": 0.001244, + "samples_ns": [ + 13530735191, + 13529849338, + 13530221422 + ], + "samples_ts": [ + 37.8398, + 37.8423, + 37.8412 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 215 + }, + { + "timestamp_utc": "2025-12-08T22:36:10.385022+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:35:57Z\",\n \"avg_ns\": 399063566,\n \"stddev_ns\": 2942920,\n \"avg_ts\": 320.762583,\n \"stddev_ts\": 2.375447,\n \"samples_ns\": [ 395669116, 400897404, 400624179 ],\n \"samples_ts\": [ 323.503, 319.284, 319.501 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:35:59Z\",\n \"avg_ns\": 3566332646,\n \"stddev_ns\": 1081461,\n \"avg_ts\": 35.891214,\n \"stddev_ts\": 0.010884,\n \"samples_ns\": [ 3567350193, 3565196969, 3566450776 ],\n \"samples_ts\": [ 35.881, 35.9026, 35.89 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:35:57Z", + "avg_ns": 399063566, + "stddev_ns": 2942920, + "avg_ts": 320.762583, + "stddev_ts": 2.375447, + "samples_ns": [ + 395669116, + 400897404, + 400624179 + ], + "samples_ts": [ + 323.503, + 319.284, + 319.501 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:35:59Z", + "avg_ns": 3566332646, + "stddev_ns": 1081461, + "avg_ts": 35.891214, + "stddev_ts": 0.010884, + "samples_ns": [ + 3567350193, + 3565196969, + 3566450776 + ], + "samples_ts": [ + 35.881, + 35.9026, + 35.89 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 216 + }, + { + "timestamp_utc": "2025-12-08T22:36:56.296033+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:36:11Z\",\n \"avg_ns\": 401043756,\n \"stddev_ns\": 545632,\n \"avg_ts\": 319.167562,\n \"stddev_ts\": 0.434284,\n \"samples_ns\": [ 401337407, 400414602, 401379260 ],\n \"samples_ts\": [ 318.934, 319.669, 318.9 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:36:12Z\",\n \"avg_ns\": 14489742559,\n \"stddev_ns\": 3754896,\n \"avg_ts\": 35.335343,\n \"stddev_ts\": 0.009156,\n \"samples_ns\": [ 14493853374, 14488880946, 14486493357 ],\n \"samples_ts\": [ 35.3253, 35.3374, 35.3433 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:36:11Z", + "avg_ns": 401043756, + "stddev_ns": 545632, + "avg_ts": 319.167562, + "stddev_ts": 0.434284, + "samples_ns": [ + 401337407, + 400414602, + 401379260 + ], + "samples_ts": [ + 318.934, + 319.669, + 318.9 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:36:12Z", + "avg_ns": 14489742559, + "stddev_ns": 3754896, + "avg_ts": 35.335343, + "stddev_ts": 0.009156, + "samples_ns": [ + 14493853374, + 14488880946, + 14486493357 + ], + "samples_ts": [ + 35.3253, + 35.3374, + 35.3433 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 217 + }, + { + "timestamp_utc": "2025-12-08T22:37:14.403456+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:36:57Z\",\n \"avg_ns\": 1651252621,\n \"stddev_ns\": 4088233,\n \"avg_ts\": 310.068904,\n \"stddev_ts\": 0.766864,\n \"samples_ns\": [ 1655814539, 1647921098, 1650022227 ],\n \"samples_ts\": [ 309.213, 310.694, 310.299 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:37:03Z\",\n \"avg_ns\": 3563157647,\n \"stddev_ns\": 1254402,\n \"avg_ts\": 35.923196,\n \"stddev_ts\": 0.012630,\n \"samples_ns\": [ 3564602396, 3562368282, 3562502264 ],\n \"samples_ts\": [ 35.9086, 35.9312, 35.9298 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:36:57Z", + "avg_ns": 1651252621, + "stddev_ns": 4088233, + "avg_ts": 310.068904, + "stddev_ts": 0.766864, + "samples_ns": [ + 1655814539, + 1647921098, + 1650022227 + ], + "samples_ts": [ + 309.213, + 310.694, + 310.299 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:37:03Z", + "avg_ns": 3563157647, + "stddev_ns": 1254402, + "avg_ts": 35.923196, + "stddev_ts": 0.01263, + "samples_ns": [ + 3564602396, + 3562368282, + 3562502264 + ], + "samples_ts": [ + 35.9086, + 35.9312, + 35.9298 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 218 + }, + { + "timestamp_utc": "2025-12-08T22:38:05.302760+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:37:15Z\",\n \"avg_ns\": 1648509335,\n \"stddev_ns\": 321923,\n \"avg_ts\": 310.583629,\n \"stddev_ts\": 0.059672,\n \"samples_ns\": [ 1648873345, 1648358322, 1648296340 ],\n \"samples_ts\": [ 310.515, 310.612, 310.624 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:37:21Z\",\n \"avg_ns\": 14490346065,\n \"stddev_ns\": 8673311,\n \"avg_ts\": 35.333878,\n \"stddev_ts\": 0.021140,\n \"samples_ns\": [ 14500314505, 14484534501, 14486189190 ],\n \"samples_ts\": [ 35.3096, 35.348, 35.344 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:37:15Z", + "avg_ns": 1648509335, + "stddev_ns": 321923, + "avg_ts": 310.583629, + "stddev_ts": 0.059672, + "samples_ns": [ + 1648873345, + 1648358322, + 1648296340 + ], + "samples_ts": [ + 310.515, + 310.612, + 310.624 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:37:21Z", + "avg_ns": 14490346065, + "stddev_ns": 8673311, + "avg_ts": 35.333878, + "stddev_ts": 0.02114, + "samples_ns": [ + 14500314505, + 14484534501, + 14486189190 + ], + "samples_ts": [ + 35.3096, + 35.348, + 35.344 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 219 + }, + { + "timestamp_utc": "2025-12-08T22:38:18.449603+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:38:06Z\",\n \"avg_ns\": 400120720,\n \"stddev_ns\": 91474,\n \"avg_ts\": 319.903463,\n \"stddev_ts\": 0.069558,\n \"samples_ns\": [ 400185616, 400154672, 400021874 ],\n \"samples_ts\": [ 319.852, 319.876, 319.983 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:38:07Z\",\n \"avg_ns\": 3570527238,\n \"stddev_ns\": 2169347,\n \"avg_ts\": 35.849056,\n \"stddev_ts\": 0.021783,\n \"samples_ns\": [ 3572555232, 3568239860, 3570786622 ],\n \"samples_ts\": [ 35.8287, 35.872, 35.8464 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:38:06Z", + "avg_ns": 400120720, + "stddev_ns": 91474, + "avg_ts": 319.903463, + "stddev_ts": 0.069558, + "samples_ns": [ + 400185616, + 400154672, + 400021874 + ], + "samples_ts": [ + 319.852, + 319.876, + 319.983 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:38:07Z", + "avg_ns": 3570527238, + "stddev_ns": 2169347, + "avg_ts": 35.849056, + "stddev_ts": 0.021783, + "samples_ns": [ + 3572555232, + 3568239860, + 3570786622 + ], + "samples_ts": [ + 35.8287, + 35.872, + 35.8464 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 220 + }, + { + "timestamp_utc": "2025-12-08T22:39:04.172493+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:38:19Z\",\n \"avg_ns\": 399641661,\n \"stddev_ns\": 425384,\n \"avg_ts\": 320.287170,\n \"stddev_ts\": 0.340786,\n \"samples_ns\": [ 399530352, 400111634, 399282997 ],\n \"samples_ts\": [ 320.376, 319.911, 320.575 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:38:20Z\",\n \"avg_ns\": 14428945320,\n \"stddev_ns\": 81378943,\n \"avg_ts\": 35.484979,\n \"stddev_ts\": 0.199487,\n \"samples_ns\": [ 14384459222, 14379506724, 14522870015 ],\n \"samples_ts\": [ 35.594, 35.6062, 35.2547 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:38:19Z", + "avg_ns": 399641661, + "stddev_ns": 425384, + "avg_ts": 320.28717, + "stddev_ts": 0.340786, + "samples_ns": [ + 399530352, + 400111634, + 399282997 + ], + "samples_ts": [ + 320.376, + 319.911, + 320.575 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:38:20Z", + "avg_ns": 14428945320, + "stddev_ns": 81378943, + "avg_ts": 35.484979, + "stddev_ts": 0.199487, + "samples_ns": [ + 14384459222, + 14379506724, + 14522870015 + ], + "samples_ts": [ + 35.594, + 35.6062, + 35.2547 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 221 + }, + { + "timestamp_utc": "2025-12-08T22:39:22.703850+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:39:04Z\",\n \"avg_ns\": 1733523006,\n \"stddev_ns\": 3333272,\n \"avg_ts\": 295.353023,\n \"stddev_ts\": 0.567384,\n \"samples_ns\": [ 1737237701, 1730795013, 1732536306 ],\n \"samples_ts\": [ 294.721, 295.818, 295.521 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:39:11Z\",\n \"avg_ns\": 3577515389,\n \"stddev_ns\": 1658065,\n \"avg_ts\": 35.779027,\n \"stddev_ts\": 0.016586,\n \"samples_ns\": [ 3575629694, 3578171364, 3578745109 ],\n \"samples_ts\": [ 35.7979, 35.7725, 35.7667 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:39:04Z", + "avg_ns": 1733523006, + "stddev_ns": 3333272, + "avg_ts": 295.353023, + "stddev_ts": 0.567384, + "samples_ns": [ + 1737237701, + 1730795013, + 1732536306 + ], + "samples_ts": [ + 294.721, + 295.818, + 295.521 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:39:11Z", + "avg_ns": 3577515389, + "stddev_ns": 1658065, + "avg_ts": 35.779027, + "stddev_ts": 0.016586, + "samples_ns": [ + 3575629694, + 3578171364, + 3578745109 + ], + "samples_ts": [ + 35.7979, + 35.7725, + 35.7667 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 222 + }, + { + "timestamp_utc": "2025-12-08T22:40:13.239644+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:39:23Z\",\n \"avg_ns\": 1688562276,\n \"stddev_ns\": 1320434,\n \"avg_ts\": 303.216657,\n \"stddev_ts\": 0.237217,\n \"samples_ns\": [ 1689246714, 1689399975, 1687040139 ],\n \"samples_ts\": [ 303.094, 303.066, 303.49 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:39:30Z\",\n \"avg_ns\": 14316249628,\n \"stddev_ns\": 7518541,\n \"avg_ts\": 35.763563,\n \"stddev_ts\": 0.018779,\n \"samples_ns\": [ 14309899697, 14324551702, 14314297485 ],\n \"samples_ts\": [ 35.7794, 35.7428, 35.7684 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:39:23Z", + "avg_ns": 1688562276, + "stddev_ns": 1320434, + "avg_ts": 303.216657, + "stddev_ts": 0.237217, + "samples_ns": [ + 1689246714, + 1689399975, + 1687040139 + ], + "samples_ts": [ + 303.094, + 303.066, + 303.49 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:39:30Z", + "avg_ns": 14316249628, + "stddev_ns": 7518541, + "avg_ts": 35.763563, + "stddev_ts": 0.018779, + "samples_ns": [ + 14309899697, + 14324551702, + 14314297485 + ], + "samples_ts": [ + 35.7794, + 35.7428, + 35.7684 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 223 + }, + { + "timestamp_utc": "2025-12-08T22:40:26.344805+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:40:14Z\",\n \"avg_ns\": 401841795,\n \"stddev_ns\": 976034,\n \"avg_ts\": 318.534570,\n \"stddev_ts\": 0.773656,\n \"samples_ns\": [ 400871974, 402823927, 401829484 ],\n \"samples_ts\": [ 319.304, 317.757, 318.543 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:40:15Z\",\n \"avg_ns\": 3539400962,\n \"stddev_ns\": 2098679,\n \"avg_ts\": 36.164320,\n \"stddev_ts\": 0.021430,\n \"samples_ns\": [ 3541259188, 3537127371, 3539816329 ],\n \"samples_ts\": [ 36.1453, 36.1876, 36.1601 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:40:14Z", + "avg_ns": 401841795, + "stddev_ns": 976034, + "avg_ts": 318.53457, + "stddev_ts": 0.773656, + "samples_ns": [ + 400871974, + 402823927, + 401829484 + ], + "samples_ts": [ + 319.304, + 317.757, + 318.543 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:40:15Z", + "avg_ns": 3539400962, + "stddev_ns": 2098679, + "avg_ts": 36.16432, + "stddev_ts": 0.02143, + "samples_ns": [ + 3541259188, + 3537127371, + 3539816329 + ], + "samples_ts": [ + 36.1453, + 36.1876, + 36.1601 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 224 + }, + { + "timestamp_utc": "2025-12-08T22:41:11.941474+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:40:27Z\",\n \"avg_ns\": 405714388,\n \"stddev_ns\": 434489,\n \"avg_ts\": 315.493119,\n \"stddev_ts\": 0.337914,\n \"samples_ns\": [ 405263056, 405750302, 406129806 ],\n \"samples_ts\": [ 315.844, 315.465, 315.17 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:40:28Z\",\n \"avg_ns\": 14379264357,\n \"stddev_ns\": 5314077,\n \"avg_ts\": 35.606832,\n \"stddev_ts\": 0.013153,\n \"samples_ns\": [ 14376276292, 14376118493, 14385398287 ],\n \"samples_ts\": [ 35.6142, 35.6146, 35.5916 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:40:27Z", + "avg_ns": 405714388, + "stddev_ns": 434489, + "avg_ts": 315.493119, + "stddev_ts": 0.337914, + "samples_ns": [ + 405263056, + 405750302, + 406129806 + ], + "samples_ts": [ + 315.844, + 315.465, + 315.17 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:40:28Z", + "avg_ns": 14379264357, + "stddev_ns": 5314077, + "avg_ts": 35.606832, + "stddev_ts": 0.013153, + "samples_ns": [ + 14376276292, + 14376118493, + 14385398287 + ], + "samples_ts": [ + 35.6142, + 35.6146, + 35.5916 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 225 + }, + { + "timestamp_utc": "2025-12-08T22:41:32.068755+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:41:12Z\",\n \"avg_ns\": 2136408297,\n \"stddev_ns\": 5607771,\n \"avg_ts\": 239.655666,\n \"stddev_ts\": 0.629799,\n \"samples_ns\": [ 2130069731, 2140722028, 2138433134 ],\n \"samples_ts\": [ 240.368, 239.172, 239.428 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:41:21Z\",\n \"avg_ns\": 3572998207,\n \"stddev_ns\": 1448703,\n \"avg_ts\": 35.824259,\n \"stddev_ts\": 0.014513,\n \"samples_ns\": [ 3573075137, 3574405678, 3571513807 ],\n \"samples_ts\": [ 35.8235, 35.8101, 35.8391 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:41:12Z", + "avg_ns": 2136408297, + "stddev_ns": 5607771, + "avg_ts": 239.655666, + "stddev_ts": 0.629799, + "samples_ns": [ + 2130069731, + 2140722028, + 2138433134 + ], + "samples_ts": [ + 240.368, + 239.172, + 239.428 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:41:21Z", + "avg_ns": 3572998207, + "stddev_ns": 1448703, + "avg_ts": 35.824259, + "stddev_ts": 0.014513, + "samples_ns": [ + 3573075137, + 3574405678, + 3571513807 + ], + "samples_ts": [ + 35.8235, + 35.8101, + 35.8391 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 226 + }, + { + "timestamp_utc": "2025-12-08T22:42:25.066217+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:41:32Z\",\n \"avg_ns\": 2163684883,\n \"stddev_ns\": 502305,\n \"avg_ts\": 236.633357,\n \"stddev_ts\": 0.054942,\n \"samples_ns\": [ 2163976895, 2163104875, 2163972879 ],\n \"samples_ts\": [ 236.601, 236.697, 236.602 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:41:41Z\",\n \"avg_ns\": 14499326667,\n \"stddev_ns\": 6195139,\n \"avg_ts\": 35.311989,\n \"stddev_ts\": 0.015084,\n \"samples_ns\": [ 14492525697, 14500813818, 14504640488 ],\n \"samples_ts\": [ 35.3286, 35.3084, 35.299 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:41:32Z", + "avg_ns": 2163684883, + "stddev_ns": 502305, + "avg_ts": 236.633357, + "stddev_ts": 0.054942, + "samples_ns": [ + 2163976895, + 2163104875, + 2163972879 + ], + "samples_ts": [ + 236.601, + 236.697, + 236.602 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:41:41Z", + "avg_ns": 14499326667, + "stddev_ns": 6195139, + "avg_ts": 35.311989, + "stddev_ts": 0.015084, + "samples_ns": [ + 14492525697, + 14500813818, + 14504640488 + ], + "samples_ts": [ + 35.3286, + 35.3084, + 35.299 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 227 + }, + { + "timestamp_utc": "2025-12-08T22:42:38.198952+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:42:25Z\",\n \"avg_ns\": 401271337,\n \"stddev_ns\": 1108327,\n \"avg_ts\": 318.987774,\n \"stddev_ts\": 0.880287,\n \"samples_ns\": [ 400307287, 401024427, 402482297 ],\n \"samples_ts\": [ 319.754, 319.183, 318.026 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:42:27Z\",\n \"avg_ns\": 3564152677,\n \"stddev_ns\": 1539669,\n \"avg_ts\": 35.913169,\n \"stddev_ts\": 0.015495,\n \"samples_ns\": [ 3562377615, 3565058079, 3565022339 ],\n \"samples_ts\": [ 35.9311, 35.904, 35.9044 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:42:25Z", + "avg_ns": 401271337, + "stddev_ns": 1108327, + "avg_ts": 318.987774, + "stddev_ts": 0.880287, + "samples_ns": [ + 400307287, + 401024427, + 402482297 + ], + "samples_ts": [ + 319.754, + 319.183, + 318.026 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:42:27Z", + "avg_ns": 3564152677, + "stddev_ns": 1539669, + "avg_ts": 35.913169, + "stddev_ts": 0.015495, + "samples_ns": [ + 3562377615, + 3565058079, + 3565022339 + ], + "samples_ts": [ + 35.9311, + 35.904, + 35.9044 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 228 + }, + { + "timestamp_utc": "2025-12-08T22:43:23.660662+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:42:38Z\",\n \"avg_ns\": 401389941,\n \"stddev_ns\": 581833,\n \"avg_ts\": 318.892344,\n \"stddev_ts\": 0.462040,\n \"samples_ns\": [ 400790694, 401427257, 401951873 ],\n \"samples_ts\": [ 319.369, 318.862, 318.446 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:42:40Z\",\n \"avg_ns\": 14341345141,\n \"stddev_ns\": 7939206,\n \"avg_ts\": 35.700982,\n \"stddev_ts\": 0.019764,\n \"samples_ns\": [ 14332448026, 14347699825, 14343887574 ],\n \"samples_ts\": [ 35.7231, 35.6852, 35.6946 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:42:38Z", + "avg_ns": 401389941, + "stddev_ns": 581833, + "avg_ts": 318.892344, + "stddev_ts": 0.46204, + "samples_ns": [ + 400790694, + 401427257, + 401951873 + ], + "samples_ts": [ + 319.369, + 318.862, + 318.446 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:42:40Z", + "avg_ns": 14341345141, + "stddev_ns": 7939206, + "avg_ts": 35.700982, + "stddev_ts": 0.019764, + "samples_ns": [ + 14332448026, + 14347699825, + 14343887574 + ], + "samples_ts": [ + 35.7231, + 35.6852, + 35.6946 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 229 + }, + { + "timestamp_utc": "2025-12-08T22:43:41.874311+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:43:24Z\",\n \"avg_ns\": 1693338703,\n \"stddev_ns\": 2357456,\n \"avg_ts\": 302.361636,\n \"stddev_ts\": 0.420907,\n \"samples_ns\": [ 1693404294, 1695662321, 1690949495 ],\n \"samples_ts\": [ 302.35, 301.947, 302.788 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:43:31Z\",\n \"avg_ns\": 3535366903,\n \"stddev_ns\": 662579,\n \"avg_ts\": 36.205578,\n \"stddev_ts\": 0.006785,\n \"samples_ns\": [ 3535355881, 3536034925, 3534709903 ],\n \"samples_ts\": [ 36.2057, 36.1987, 36.2123 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:43:24Z", + "avg_ns": 1693338703, + "stddev_ns": 2357456, + "avg_ts": 302.361636, + "stddev_ts": 0.420907, + "samples_ns": [ + 1693404294, + 1695662321, + 1690949495 + ], + "samples_ts": [ + 302.35, + 301.947, + 302.788 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:43:31Z", + "avg_ns": 3535366903, + "stddev_ns": 662579, + "avg_ts": 36.205578, + "stddev_ts": 0.006785, + "samples_ns": [ + 3535355881, + 3536034925, + 3534709903 + ], + "samples_ts": [ + 36.2057, + 36.1987, + 36.2123 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 230 + }, + { + "timestamp_utc": "2025-12-08T22:44:32.318378+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:43:42Z\",\n \"avg_ns\": 1657490929,\n \"stddev_ns\": 833021,\n \"avg_ts\": 308.900687,\n \"stddev_ts\": 0.155254,\n \"samples_ns\": [ 1657535852, 1656636355, 1658300580 ],\n \"samples_ts\": [ 308.892, 309.06, 308.75 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:43:49Z\",\n \"avg_ns\": 14324323591,\n \"stddev_ns\": 17941935,\n \"avg_ts\": 35.743435,\n \"stddev_ts\": 0.044738,\n \"samples_ns\": [ 14345007857, 14314991205, 14312971712 ],\n \"samples_ts\": [ 35.6919, 35.7667, 35.7717 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:43:42Z", + "avg_ns": 1657490929, + "stddev_ns": 833021, + "avg_ts": 308.900687, + "stddev_ts": 0.155254, + "samples_ns": [ + 1657535852, + 1656636355, + 1658300580 + ], + "samples_ts": [ + 308.892, + 309.06, + 308.75 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:43:49Z", + "avg_ns": 14324323591, + "stddev_ns": 17941935, + "avg_ts": 35.743435, + "stddev_ts": 0.044738, + "samples_ns": [ + 14345007857, + 14314991205, + 14312971712 + ], + "samples_ts": [ + 35.6919, + 35.7667, + 35.7717 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 231 + }, + { + "timestamp_utc": "2025-12-08T22:44:45.344246+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:44:33Z\",\n \"avg_ns\": 401437615,\n \"stddev_ns\": 184622,\n \"avg_ts\": 318.854070,\n \"stddev_ts\": 0.144881,\n \"samples_ns\": [ 401638854, 401390942, 401283051 ],\n \"samples_ts\": [ 318.694, 318.891, 318.977 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:44:34Z\",\n \"avg_ns\": 3529872060,\n \"stddev_ns\": 2273580,\n \"avg_ts\": 36.261948,\n \"stddev_ts\": 0.023339,\n \"samples_ns\": [ 3532208305, 3527670217, 3529737660 ],\n \"samples_ts\": [ 36.238, 36.2846, 36.2633 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:44:33Z", + "avg_ns": 401437615, + "stddev_ns": 184622, + "avg_ts": 318.85407, + "stddev_ts": 0.144881, + "samples_ns": [ + 401638854, + 401390942, + 401283051 + ], + "samples_ts": [ + 318.694, + 318.891, + 318.977 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:44:34Z", + "avg_ns": 3529872060, + "stddev_ns": 2273580, + "avg_ts": 36.261948, + "stddev_ts": 0.023339, + "samples_ns": [ + 3532208305, + 3527670217, + 3529737660 + ], + "samples_ts": [ + 36.238, + 36.2846, + 36.2633 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 232 + }, + { + "timestamp_utc": "2025-12-08T22:45:31.372307+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:44:46Z\",\n \"avg_ns\": 401672490,\n \"stddev_ns\": 639974,\n \"avg_ts\": 318.668117,\n \"stddev_ts\": 0.507298,\n \"samples_ns\": [ 402355050, 401575649, 401086772 ],\n \"samples_ts\": [ 318.127, 318.744, 319.133 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:44:47Z\",\n \"avg_ns\": 14528767625,\n \"stddev_ns\": 3883183,\n \"avg_ts\": 35.240430,\n \"stddev_ts\": 0.009415,\n \"samples_ns\": [ 14529120472, 14524721937, 14532460467 ],\n \"samples_ts\": [ 35.2396, 35.2502, 35.2315 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:44:46Z", + "avg_ns": 401672490, + "stddev_ns": 639974, + "avg_ts": 318.668117, + "stddev_ts": 0.507298, + "samples_ns": [ + 402355050, + 401575649, + 401086772 + ], + "samples_ts": [ + 318.127, + 318.744, + 319.133 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:44:47Z", + "avg_ns": 14528767625, + "stddev_ns": 3883183, + "avg_ts": 35.24043, + "stddev_ts": 0.009415, + "samples_ns": [ + 14529120472, + 14524721937, + 14532460467 + ], + "samples_ts": [ + 35.2396, + 35.2502, + 35.2315 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 233 + }, + { + "timestamp_utc": "2025-12-08T22:45:49.878258+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:45:32Z\",\n \"avg_ns\": 1762214829,\n \"stddev_ns\": 1440271,\n \"avg_ts\": 290.543593,\n \"stddev_ts\": 0.237330,\n \"samples_ns\": [ 1762578705, 1763437012, 1760628772 ],\n \"samples_ts\": [ 290.483, 290.342, 290.805 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:45:39Z\",\n \"avg_ns\": 3552362399,\n \"stddev_ns\": 1718225,\n \"avg_ts\": 36.032365,\n \"stddev_ts\": 0.017403,\n \"samples_ns\": [ 3554301833, 3551745021, 3551040345 ],\n \"samples_ts\": [ 36.0127, 36.0386, 36.0458 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:45:32Z", + "avg_ns": 1762214829, + "stddev_ns": 1440271, + "avg_ts": 290.543593, + "stddev_ts": 0.23733, + "samples_ns": [ + 1762578705, + 1763437012, + 1760628772 + ], + "samples_ts": [ + 290.483, + 290.342, + 290.805 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:45:39Z", + "avg_ns": 3552362399, + "stddev_ns": 1718225, + "avg_ts": 36.032365, + "stddev_ts": 0.017403, + "samples_ns": [ + 3554301833, + 3551745021, + 3551040345 + ], + "samples_ts": [ + 36.0127, + 36.0386, + 36.0458 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 234 + }, + { + "timestamp_utc": "2025-12-08T22:46:41.828204+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:45:50Z\",\n \"avg_ns\": 1731208044,\n \"stddev_ns\": 1012718,\n \"avg_ts\": 295.747307,\n \"stddev_ts\": 0.172844,\n \"samples_ns\": [ 1731102481, 1732268551, 1730253101 ],\n \"samples_ts\": [ 295.765, 295.566, 295.91 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:45:57Z\",\n \"avg_ns\": 14714337904,\n \"stddev_ns\": 3345273,\n \"avg_ts\": 34.795994,\n \"stddev_ts\": 0.007906,\n \"samples_ns\": [ 14715496303, 14710569704, 14716947706 ],\n \"samples_ts\": [ 34.7933, 34.8049, 34.7898 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:45:50Z", + "avg_ns": 1731208044, + "stddev_ns": 1012718, + "avg_ts": 295.747307, + "stddev_ts": 0.172844, + "samples_ns": [ + 1731102481, + 1732268551, + 1730253101 + ], + "samples_ts": [ + 295.765, + 295.566, + 295.91 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:45:57Z", + "avg_ns": 14714337904, + "stddev_ns": 3345273, + "avg_ts": 34.795994, + "stddev_ts": 0.007906, + "samples_ns": [ + 14715496303, + 14710569704, + 14716947706 + ], + "samples_ts": [ + 34.7933, + 34.8049, + 34.7898 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 235 + }, + { + "timestamp_utc": "2025-12-08T22:46:54.961130+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:46:42Z\",\n \"avg_ns\": 400649470,\n \"stddev_ns\": 399343,\n \"avg_ts\": 319.481476,\n \"stddev_ts\": 0.317773,\n \"samples_ns\": [ 400970290, 400203614, 400774508 ],\n \"samples_ts\": [ 319.226, 319.837, 319.382 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:46:44Z\",\n \"avg_ns\": 3562815161,\n \"stddev_ns\": 500708,\n \"avg_ts\": 35.926647,\n \"stddev_ts\": 0.005013,\n \"samples_ns\": [ 3562292499, 3563282074, 3562870911 ],\n \"samples_ts\": [ 35.9319, 35.9219, 35.9261 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:46:42Z", + "avg_ns": 400649470, + "stddev_ns": 399343, + "avg_ts": 319.481476, + "stddev_ts": 0.317773, + "samples_ns": [ + 400970290, + 400203614, + 400774508 + ], + "samples_ts": [ + 319.226, + 319.837, + 319.382 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:46:44Z", + "avg_ns": 3562815161, + "stddev_ns": 500708, + "avg_ts": 35.926647, + "stddev_ts": 0.005013, + "samples_ns": [ + 3562292499, + 3563282074, + 3562870911 + ], + "samples_ts": [ + 35.9319, + 35.9219, + 35.9261 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 236 + }, + { + "timestamp_utc": "2025-12-08T22:47:40.849616+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:46:55Z\",\n \"avg_ns\": 401136603,\n \"stddev_ns\": 659986,\n \"avg_ts\": 319.093870,\n \"stddev_ts\": 0.524673,\n \"samples_ns\": [ 400954984, 400586442, 401868383 ],\n \"samples_ts\": [ 319.238, 319.532, 318.512 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:46:57Z\",\n \"avg_ns\": 14475227727,\n \"stddev_ns\": 2370802,\n \"avg_ts\": 35.370774,\n \"stddev_ts\": 0.005785,\n \"samples_ns\": [ 14474583059, 14477851046, 14473249077 ],\n \"samples_ts\": [ 35.3723, 35.3644, 35.3756 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:46:55Z", + "avg_ns": 401136603, + "stddev_ns": 659986, + "avg_ts": 319.09387, + "stddev_ts": 0.524673, + "samples_ns": [ + 400954984, + 400586442, + 401868383 + ], + "samples_ts": [ + 319.238, + 319.532, + 318.512 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:46:57Z", + "avg_ns": 14475227727, + "stddev_ns": 2370802, + "avg_ts": 35.370774, + "stddev_ts": 0.005785, + "samples_ns": [ + 14474583059, + 14477851046, + 14473249077 + ], + "samples_ts": [ + 35.3723, + 35.3644, + 35.3756 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 237 + }, + { + "timestamp_utc": "2025-12-08T22:48:00.292365+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:47:41Z\",\n \"avg_ns\": 2002254647,\n \"stddev_ns\": 14240713,\n \"avg_ts\": 255.720389,\n \"stddev_ts\": 1.826259,\n \"samples_ns\": [ 1985811325, 2010382622, 2010569995 ],\n \"samples_ts\": [ 257.829, 254.678, 254.654 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:47:49Z\",\n \"avg_ns\": 3537087592,\n \"stddev_ns\": 4654854,\n \"avg_ts\": 36.188006,\n \"stddev_ts\": 0.047590,\n \"samples_ns\": [ 3535371223, 3533535030, 3542356524 ],\n \"samples_ts\": [ 36.2055, 36.2243, 36.1341 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:47:41Z", + "avg_ns": 2002254647, + "stddev_ns": 14240713, + "avg_ts": 255.720389, + "stddev_ts": 1.826259, + "samples_ns": [ + 1985811325, + 2010382622, + 2010569995 + ], + "samples_ts": [ + 257.829, + 254.678, + 254.654 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:47:49Z", + "avg_ns": 3537087592, + "stddev_ns": 4654854, + "avg_ts": 36.188006, + "stddev_ts": 0.04759, + "samples_ns": [ + 3535371223, + 3533535030, + 3542356524 + ], + "samples_ts": [ + 36.2055, + 36.2243, + 36.1341 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 238 + }, + { + "timestamp_utc": "2025-12-08T22:48:52.736484+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:48:01Z\",\n \"avg_ns\": 2019492322,\n \"stddev_ns\": 1364870,\n \"avg_ts\": 253.529142,\n \"stddev_ts\": 0.171321,\n \"samples_ns\": [ 2020311486, 2020247893, 2017917588 ],\n \"samples_ts\": [ 253.426, 253.434, 253.727 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:48:09Z\",\n \"avg_ns\": 14507081266,\n \"stddev_ns\": 9590742,\n \"avg_ts\": 35.293119,\n \"stddev_ts\": 0.023336,\n \"samples_ns\": [ 14496472692, 14509635746, 14515135361 ],\n \"samples_ts\": [ 35.3189, 35.2869, 35.2735 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:48:01Z", + "avg_ns": 2019492322, + "stddev_ns": 1364870, + "avg_ts": 253.529142, + "stddev_ts": 0.171321, + "samples_ns": [ + 2020311486, + 2020247893, + 2017917588 + ], + "samples_ts": [ + 253.426, + 253.434, + 253.727 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:48:09Z", + "avg_ns": 14507081266, + "stddev_ns": 9590742, + "avg_ts": 35.293119, + "stddev_ts": 0.023336, + "samples_ns": [ + 14496472692, + 14509635746, + 14515135361 + ], + "samples_ts": [ + 35.3189, + 35.2869, + 35.2735 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 239 + }, + { + "timestamp_utc": "2025-12-08T22:49:05.854914+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:48:53Z\",\n \"avg_ns\": 401381949,\n \"stddev_ns\": 522136,\n \"avg_ts\": 318.898606,\n \"stddev_ts\": 0.414533,\n \"samples_ns\": [ 401651811, 401713071, 400780967 ],\n \"samples_ts\": [ 318.684, 318.635, 319.376 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:48:55Z\",\n \"avg_ns\": 3561530794,\n \"stddev_ns\": 2007078,\n \"avg_ts\": 35.939610,\n \"stddev_ts\": 0.020248,\n \"samples_ns\": [ 3559344400, 3563286988, 3561960995 ],\n \"samples_ts\": [ 35.9617, 35.9219, 35.9353 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:48:53Z", + "avg_ns": 401381949, + "stddev_ns": 522136, + "avg_ts": 318.898606, + "stddev_ts": 0.414533, + "samples_ns": [ + 401651811, + 401713071, + 400780967 + ], + "samples_ts": [ + 318.684, + 318.635, + 319.376 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:48:55Z", + "avg_ns": 3561530794, + "stddev_ns": 2007078, + "avg_ts": 35.93961, + "stddev_ts": 0.020248, + "samples_ns": [ + 3559344400, + 3563286988, + 3561960995 + ], + "samples_ts": [ + 35.9617, + 35.9219, + 35.9353 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 240 + }, + { + "timestamp_utc": "2025-12-08T22:49:51.732685+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:49:06Z\",\n \"avg_ns\": 401357507,\n \"stddev_ns\": 361469,\n \"avg_ts\": 318.917838,\n \"stddev_ts\": 0.286379,\n \"samples_ns\": [ 401394156, 400980226, 401698141 ],\n \"samples_ts\": [ 318.889, 319.218, 318.647 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:49:08Z\",\n \"avg_ns\": 14477614561,\n \"stddev_ns\": 13250788,\n \"avg_ts\": 35.364962,\n \"stddev_ts\": 0.032350,\n \"samples_ns\": [ 14492904270, 14470457063, 14469482351 ],\n \"samples_ts\": [ 35.3276, 35.3824, 35.3848 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:49:06Z", + "avg_ns": 401357507, + "stddev_ns": 361469, + "avg_ts": 318.917838, + "stddev_ts": 0.286379, + "samples_ns": [ + 401394156, + 400980226, + 401698141 + ], + "samples_ts": [ + 318.889, + 319.218, + 318.647 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:49:08Z", + "avg_ns": 14477614561, + "stddev_ns": 13250788, + "avg_ts": 35.364962, + "stddev_ts": 0.03235, + "samples_ns": [ + 14492904270, + 14470457063, + 14469482351 + ], + "samples_ts": [ + 35.3276, + 35.3824, + 35.3848 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 241 + }, + { + "timestamp_utc": "2025-12-08T22:50:09.899006+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:49:52Z\",\n \"avg_ns\": 1651162876,\n \"stddev_ns\": 1465153,\n \"avg_ts\": 310.084654,\n \"stddev_ts\": 0.275122,\n \"samples_ns\": [ 1649565676, 1652442866, 1651480087 ],\n \"samples_ts\": [ 310.385, 309.844, 310.025 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:49:59Z\",\n \"avg_ns\": 3574144513,\n \"stddev_ns\": 3847290,\n \"avg_ts\": 35.812793,\n \"stddev_ts\": 0.038568,\n \"samples_ns\": [ 3569822089, 3577194055, 3575417395 ],\n \"samples_ts\": [ 35.8561, 35.7822, 35.8 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:49:52Z", + "avg_ns": 1651162876, + "stddev_ns": 1465153, + "avg_ts": 310.084654, + "stddev_ts": 0.275122, + "samples_ns": [ + 1649565676, + 1652442866, + 1651480087 + ], + "samples_ts": [ + 310.385, + 309.844, + 310.025 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:49:59Z", + "avg_ns": 3574144513, + "stddev_ns": 3847290, + "avg_ts": 35.812793, + "stddev_ts": 0.038568, + "samples_ns": [ + 3569822089, + 3577194055, + 3575417395 + ], + "samples_ts": [ + 35.8561, + 35.7822, + 35.8 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 242 + }, + { + "timestamp_utc": "2025-12-08T22:51:01.054672+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:50:10Z\",\n \"avg_ns\": 1670409952,\n \"stddev_ns\": 1546610,\n \"avg_ts\": 306.511759,\n \"stddev_ts\": 0.283835,\n \"samples_ns\": [ 1671503876, 1671084806, 1668641175 ],\n \"samples_ts\": [ 306.311, 306.388, 306.836 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:50:17Z\",\n \"avg_ns\": 14543187261,\n \"stddev_ns\": 15615606,\n \"avg_ts\": 35.205515,\n \"stddev_ts\": 0.037777,\n \"samples_ns\": [ 14561214168, 14534498790, 14533848826 ],\n \"samples_ts\": [ 35.1619, 35.2265, 35.2281 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:50:10Z", + "avg_ns": 1670409952, + "stddev_ns": 1546610, + "avg_ts": 306.511759, + "stddev_ts": 0.283835, + "samples_ns": [ + 1671503876, + 1671084806, + 1668641175 + ], + "samples_ts": [ + 306.311, + 306.388, + 306.836 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:50:17Z", + "avg_ns": 14543187261, + "stddev_ns": 15615606, + "avg_ts": 35.205515, + "stddev_ts": 0.037777, + "samples_ns": [ + 14561214168, + 14534498790, + 14533848826 + ], + "samples_ts": [ + 35.1619, + 35.2265, + 35.2281 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 243 + }, + { + "timestamp_utc": "2025-12-08T22:51:14.173606+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:51:01Z\",\n \"avg_ns\": 400845731,\n \"stddev_ns\": 761307,\n \"avg_ts\": 319.325610,\n \"stddev_ts\": 0.605824,\n \"samples_ns\": [ 400448239, 400365442, 401723512 ],\n \"samples_ts\": [ 319.642, 319.708, 318.627 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:51:03Z\",\n \"avg_ns\": 3557357116,\n \"stddev_ns\": 927598,\n \"avg_ts\": 35.981770,\n \"stddev_ts\": 0.009362,\n \"samples_ns\": [ 3558411070, 3556675972, 3556984307 ],\n \"samples_ts\": [ 35.9711, 35.9887, 35.9855 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:51:01Z", + "avg_ns": 400845731, + "stddev_ns": 761307, + "avg_ts": 319.32561, + "stddev_ts": 0.605824, + "samples_ns": [ + 400448239, + 400365442, + 401723512 + ], + "samples_ts": [ + 319.642, + 319.708, + 318.627 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:51:03Z", + "avg_ns": 3557357116, + "stddev_ns": 927598, + "avg_ts": 35.98177, + "stddev_ts": 0.009362, + "samples_ns": [ + 3558411070, + 3556675972, + 3556984307 + ], + "samples_ts": [ + 35.9711, + 35.9887, + 35.9855 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 244 + }, + { + "timestamp_utc": "2025-12-08T22:52:00.046255+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:51:14Z\",\n \"avg_ns\": 399856629,\n \"stddev_ns\": 558410,\n \"avg_ts\": 320.115153,\n \"stddev_ts\": 0.446772,\n \"samples_ns\": [ 399295969, 399861883, 400412036 ],\n \"samples_ts\": [ 320.564, 320.111, 319.671 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:51:16Z\",\n \"avg_ns\": 14477994291,\n \"stddev_ns\": 8171160,\n \"avg_ts\": 35.364022,\n \"stddev_ts\": 0.019953,\n \"samples_ns\": [ 14474833391, 14487273728, 14471875754 ],\n \"samples_ts\": [ 35.3717, 35.3414, 35.379 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:51:14Z", + "avg_ns": 399856629, + "stddev_ns": 558410, + "avg_ts": 320.115153, + "stddev_ts": 0.446772, + "samples_ns": [ + 399295969, + 399861883, + 400412036 + ], + "samples_ts": [ + 320.564, + 320.111, + 319.671 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:51:16Z", + "avg_ns": 14477994291, + "stddev_ns": 8171160, + "avg_ts": 35.364022, + "stddev_ts": 0.019953, + "samples_ns": [ + 14474833391, + 14487273728, + 14471875754 + ], + "samples_ts": [ + 35.3717, + 35.3414, + 35.379 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 245 + }, + { + "timestamp_utc": "2025-12-08T22:52:18.291669+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:52:00Z\",\n \"avg_ns\": 1688604178,\n \"stddev_ns\": 160158,\n \"avg_ts\": 303.209011,\n \"stddev_ts\": 0.028759,\n \"samples_ns\": [ 1688751021, 1688433398, 1688628115 ],\n \"samples_ts\": [ 303.183, 303.24, 303.205 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:52:07Z\",\n \"avg_ns\": 3535222407,\n \"stddev_ns\": 608021,\n \"avg_ts\": 36.207058,\n \"stddev_ts\": 0.006167,\n \"samples_ns\": [ 3535911413, 3534796829, 3534958981 ],\n \"samples_ts\": [ 36.2, 36.2114, 36.2098 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:52:00Z", + "avg_ns": 1688604178, + "stddev_ns": 160158, + "avg_ts": 303.209011, + "stddev_ts": 0.028759, + "samples_ns": [ + 1688751021, + 1688433398, + 1688628115 + ], + "samples_ts": [ + 303.183, + 303.24, + 303.205 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:52:07Z", + "avg_ns": 3535222407, + "stddev_ns": 608021, + "avg_ts": 36.207058, + "stddev_ts": 0.006167, + "samples_ns": [ + 3535911413, + 3534796829, + 3534958981 + ], + "samples_ts": [ + 36.2, + 36.2114, + 36.2098 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 246 + }, + { + "timestamp_utc": "2025-12-08T22:53:09.501396+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:52:19Z\",\n \"avg_ns\": 1689042677,\n \"stddev_ns\": 1647668,\n \"avg_ts\": 303.130484,\n \"stddev_ts\": 0.295548,\n \"samples_ns\": [ 1690803032, 1688786187, 1687538813 ],\n \"samples_ts\": [ 302.815, 303.176, 303.4 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:52:25Z\",\n \"avg_ns\": 14532976369,\n \"stddev_ns\": 2342285,\n \"avg_ts\": 35.230224,\n \"stddev_ts\": 0.005671,\n \"samples_ns\": [ 14532996744, 14530627067, 14535305297 ],\n \"samples_ts\": [ 35.2302, 35.2359, 35.2246 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:52:19Z", + "avg_ns": 1689042677, + "stddev_ns": 1647668, + "avg_ts": 303.130484, + "stddev_ts": 0.295548, + "samples_ns": [ + 1690803032, + 1688786187, + 1687538813 + ], + "samples_ts": [ + 302.815, + 303.176, + 303.4 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:52:25Z", + "avg_ns": 14532976369, + "stddev_ns": 2342285, + "avg_ts": 35.230224, + "stddev_ts": 0.005671, + "samples_ns": [ + 14532996744, + 14530627067, + 14535305297 + ], + "samples_ts": [ + 35.2302, + 35.2359, + 35.2246 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 247 + }, + { + "timestamp_utc": "2025-12-08T22:53:22.618477+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:53:10Z\",\n \"avg_ns\": 399855352,\n \"stddev_ns\": 247578,\n \"avg_ts\": 320.115841,\n \"stddev_ts\": 0.197605,\n \"samples_ns\": [ 399922514, 399581954, 400061589 ],\n \"samples_ts\": [ 320.062, 320.335, 319.951 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:53:11Z\",\n \"avg_ns\": 3560851759,\n \"stddev_ns\": 1948218,\n \"avg_ts\": 35.946463,\n \"stddev_ts\": 0.019663,\n \"samples_ns\": [ 3561665392, 3562260293, 3558629593 ],\n \"samples_ts\": [ 35.9382, 35.9322, 35.9689 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:53:10Z", + "avg_ns": 399855352, + "stddev_ns": 247578, + "avg_ts": 320.115841, + "stddev_ts": 0.197605, + "samples_ns": [ + 399922514, + 399581954, + 400061589 + ], + "samples_ts": [ + 320.062, + 320.335, + 319.951 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:53:11Z", + "avg_ns": 3560851759, + "stddev_ns": 1948218, + "avg_ts": 35.946463, + "stddev_ts": 0.019663, + "samples_ns": [ + 3561665392, + 3562260293, + 3558629593 + ], + "samples_ts": [ + 35.9382, + 35.9322, + 35.9689 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 248 + }, + { + "timestamp_utc": "2025-12-08T22:54:08.616213+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:53:23Z\",\n \"avg_ns\": 400592440,\n \"stddev_ns\": 408123,\n \"avg_ts\": 319.526969,\n \"stddev_ts\": 0.324963,\n \"samples_ns\": [ 401059847, 400406821, 400310653 ],\n \"samples_ts\": [ 319.154, 319.675, 319.752 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:53:24Z\",\n \"avg_ns\": 14518082103,\n \"stddev_ns\": 2422018,\n \"avg_ts\": 35.266367,\n \"stddev_ts\": 0.005876,\n \"samples_ns\": [ 14516732445, 14520874826, 14516639039 ],\n \"samples_ts\": [ 35.2696, 35.2596, 35.2699 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:53:23Z", + "avg_ns": 400592440, + "stddev_ns": 408123, + "avg_ts": 319.526969, + "stddev_ts": 0.324963, + "samples_ns": [ + 401059847, + 400406821, + 400310653 + ], + "samples_ts": [ + 319.154, + 319.675, + 319.752 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:53:24Z", + "avg_ns": 14518082103, + "stddev_ns": 2422018, + "avg_ts": 35.266367, + "stddev_ts": 0.005876, + "samples_ns": [ + 14516732445, + 14520874826, + 14516639039 + ], + "samples_ts": [ + 35.2696, + 35.2596, + 35.2699 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 249 + }, + { + "timestamp_utc": "2025-12-08T22:54:28.757204+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:54:09Z\",\n \"avg_ns\": 2153541676,\n \"stddev_ns\": 261927,\n \"avg_ts\": 237.747897,\n \"stddev_ts\": 0.028915,\n \"samples_ns\": [ 2153830152, 2153318748, 2153476128 ],\n \"samples_ts\": [ 237.716, 237.773, 237.755 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:54:17Z\",\n \"avg_ns\": 3560145331,\n \"stddev_ns\": 1390845,\n \"avg_ts\": 35.953592,\n \"stddev_ts\": 0.014033,\n \"samples_ns\": [ 3560055605, 3558802804, 3561577585 ],\n \"samples_ts\": [ 35.9545, 35.9672, 35.9391 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:54:09Z", + "avg_ns": 2153541676, + "stddev_ns": 261927, + "avg_ts": 237.747897, + "stddev_ts": 0.028915, + "samples_ns": [ + 2153830152, + 2153318748, + 2153476128 + ], + "samples_ts": [ + 237.716, + 237.773, + 237.755 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:54:17Z", + "avg_ns": 3560145331, + "stddev_ns": 1390845, + "avg_ts": 35.953592, + "stddev_ts": 0.014033, + "samples_ns": [ + 3560055605, + 3558802804, + 3561577585 + ], + "samples_ts": [ + 35.9545, + 35.9672, + 35.9391 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 250 + }, + { + "timestamp_utc": "2025-12-08T22:55:21.311020+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:54:29Z\",\n \"avg_ns\": 2046334305,\n \"stddev_ns\": 1028246,\n \"avg_ts\": 250.203539,\n \"stddev_ts\": 0.125468,\n \"samples_ns\": [ 2045373761, 2047415589, 2046213567 ],\n \"samples_ts\": [ 250.321, 250.071, 250.218 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:54:37Z\",\n \"avg_ns\": 14516411442,\n \"stddev_ns\": 1285940,\n \"avg_ts\": 35.270425,\n \"stddev_ts\": 0.003125,\n \"samples_ns\": [ 14514927872, 14517207150, 14517099304 ],\n \"samples_ts\": [ 35.274, 35.2685, 35.2688 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:54:29Z", + "avg_ns": 2046334305, + "stddev_ns": 1028246, + "avg_ts": 250.203539, + "stddev_ts": 0.125468, + "samples_ns": [ + 2045373761, + 2047415589, + 2046213567 + ], + "samples_ts": [ + 250.321, + 250.071, + 250.218 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:54:37Z", + "avg_ns": 14516411442, + "stddev_ns": 1285940, + "avg_ts": 35.270425, + "stddev_ts": 0.003125, + "samples_ns": [ + 14514927872, + 14517207150, + 14517099304 + ], + "samples_ts": [ + 35.274, + 35.2685, + 35.2688 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 251 + }, + { + "timestamp_utc": "2025-12-08T22:55:35.345326+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:55:22Z\",\n \"avg_ns\": 318581690,\n \"stddev_ns\": 624327,\n \"avg_ts\": 401.781807,\n \"stddev_ts\": 0.786201,\n \"samples_ns\": [ 318281879, 318164100, 319299092 ],\n \"samples_ts\": [ 402.159, 402.308, 400.878 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:55:23Z\",\n \"avg_ns\": 3971277633,\n \"stddev_ns\": 15270705,\n \"avg_ts\": 32.231758,\n \"stddev_ts\": 0.123704,\n \"samples_ns\": [ 3988623867, 3965347070, 3959861963 ],\n \"samples_ts\": [ 32.0913, 32.2796, 32.3244 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:55:22Z", + "avg_ns": 318581690, + "stddev_ns": 624327, + "avg_ts": 401.781807, + "stddev_ts": 0.786201, + "samples_ns": [ + 318281879, + 318164100, + 319299092 + ], + "samples_ts": [ + 402.159, + 402.308, + 400.878 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:55:23Z", + "avg_ns": 3971277633, + "stddev_ns": 15270705, + "avg_ts": 32.231758, + "stddev_ts": 0.123704, + "samples_ns": [ + 3988623867, + 3965347070, + 3959861963 + ], + "samples_ts": [ + 32.0913, + 32.2796, + 32.3244 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 252 + }, + { + "timestamp_utc": "2025-12-08T22:56:26.254476+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:55:36Z\",\n \"avg_ns\": 319531297,\n \"stddev_ns\": 727287,\n \"avg_ts\": 400.588121,\n \"stddev_ts\": 0.911515,\n \"samples_ns\": [ 319468031, 318837709, 320288151 ],\n \"samples_ts\": [ 400.666, 401.458, 399.64 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:55:37Z\",\n \"avg_ns\": 16255682881,\n \"stddev_ns\": 12822848,\n \"avg_ts\": 31.496690,\n \"stddev_ts\": 0.024840,\n \"samples_ns\": [ 16253476060, 16269465917, 16244106666 ],\n \"samples_ts\": [ 31.501, 31.47, 31.5191 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:55:36Z", + "avg_ns": 319531297, + "stddev_ns": 727287, + "avg_ts": 400.588121, + "stddev_ts": 0.911515, + "samples_ns": [ + 319468031, + 318837709, + 320288151 + ], + "samples_ts": [ + 400.666, + 401.458, + 399.64 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:55:37Z", + "avg_ns": 16255682881, + "stddev_ns": 12822848, + "avg_ts": 31.49669, + "stddev_ts": 0.02484, + "samples_ns": [ + 16253476060, + 16269465917, + 16244106666 + ], + "samples_ts": [ + 31.501, + 31.47, + 31.5191 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 253 + }, + { + "timestamp_utc": "2025-12-08T22:56:44.367401+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:56:27Z\",\n \"avg_ns\": 1322339082,\n \"stddev_ns\": 5207280,\n \"avg_ts\": 387.196677,\n \"stddev_ts\": 1.526247,\n \"samples_ns\": [ 1316738434, 1327033873, 1323244940 ],\n \"samples_ts\": [ 388.84, 385.823, 386.928 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:56:32Z\",\n \"avg_ns\": 3978839453,\n \"stddev_ns\": 13199361,\n \"avg_ts\": 32.170420,\n \"stddev_ts\": 0.106526,\n \"samples_ns\": [ 3969872027, 3993995755, 3972650579 ],\n \"samples_ts\": [ 32.2429, 32.0481, 32.2203 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:56:27Z", + "avg_ns": 1322339082, + "stddev_ns": 5207280, + "avg_ts": 387.196677, + "stddev_ts": 1.526247, + "samples_ns": [ + 1316738434, + 1327033873, + 1323244940 + ], + "samples_ts": [ + 388.84, + 385.823, + 386.928 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:56:32Z", + "avg_ns": 3978839453, + "stddev_ns": 13199361, + "avg_ts": 32.17042, + "stddev_ts": 0.106526, + "samples_ns": [ + 3969872027, + 3993995755, + 3972650579 + ], + "samples_ts": [ + 32.2429, + 32.0481, + 32.2203 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 254 + }, + { + "timestamp_utc": "2025-12-08T22:57:39.191798+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:56:45Z\",\n \"avg_ns\": 1336839738,\n \"stddev_ns\": 1870995,\n \"avg_ts\": 382.993304,\n \"stddev_ts\": 0.535692,\n \"samples_ns\": [ 1338878960, 1336436950, 1335203305 ],\n \"samples_ts\": [ 382.409, 383.108, 383.462 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:56:50Z\",\n \"avg_ns\": 16199441218,\n \"stddev_ns\": 24722980,\n \"avg_ts\": 31.606077,\n \"stddev_ts\": 0.048212,\n \"samples_ns\": [ 16226443163, 16193963002, 16177917491 ],\n \"samples_ts\": [ 31.5534, 31.6167, 31.6481 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:56:45Z", + "avg_ns": 1336839738, + "stddev_ns": 1870995, + "avg_ts": 382.993304, + "stddev_ts": 0.535692, + "samples_ns": [ + 1338878960, + 1336436950, + 1335203305 + ], + "samples_ts": [ + 382.409, + 383.108, + 383.462 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:56:50Z", + "avg_ns": 16199441218, + "stddev_ns": 24722980, + "avg_ts": 31.606077, + "stddev_ts": 0.048212, + "samples_ns": [ + 16226443163, + 16193963002, + 16177917491 + ], + "samples_ts": [ + 31.5534, + 31.6167, + 31.6481 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 255 + }, + { + "timestamp_utc": "2025-12-08T22:57:53.276206+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:57:39Z\",\n \"avg_ns\": 321178872,\n \"stddev_ns\": 2127469,\n \"avg_ts\": 398.543442,\n \"stddev_ts\": 2.632014,\n \"samples_ns\": [ 319514336, 320446494, 323575786 ],\n \"samples_ts\": [ 400.608, 399.443, 395.58 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:57:41Z\",\n \"avg_ns\": 3980463563,\n \"stddev_ns\": 6803366,\n \"avg_ts\": 32.157121,\n \"stddev_ts\": 0.054906,\n \"samples_ns\": [ 3977205473, 3988282639, 3975902579 ],\n \"samples_ts\": [ 32.1834, 32.094, 32.1939 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:57:39Z", + "avg_ns": 321178872, + "stddev_ns": 2127469, + "avg_ts": 398.543442, + "stddev_ts": 2.632014, + "samples_ns": [ + 319514336, + 320446494, + 323575786 + ], + "samples_ts": [ + 400.608, + 399.443, + 395.58 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:57:41Z", + "avg_ns": 3980463563, + "stddev_ns": 6803366, + "avg_ts": 32.157121, + "stddev_ts": 0.054906, + "samples_ns": [ + 3977205473, + 3988282639, + 3975902579 + ], + "samples_ts": [ + 32.1834, + 32.094, + 32.1939 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 256 + }, + { + "timestamp_utc": "2025-12-08T22:58:44.039882+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:57:54Z\",\n \"avg_ns\": 319998315,\n \"stddev_ns\": 929326,\n \"avg_ts\": 400.004350,\n \"stddev_ts\": 1.159660,\n \"samples_ns\": [ 319662145, 319284216, 321048586 ],\n \"samples_ts\": [ 400.423, 400.897, 398.694 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:57:55Z\",\n \"avg_ns\": 16198273674,\n \"stddev_ns\": 2435302,\n \"avg_ts\": 31.608307,\n \"stddev_ts\": 0.004752,\n \"samples_ns\": [ 16200999580, 16196312576, 16197508866 ],\n \"samples_ts\": [ 31.603, 31.6121, 31.6098 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:57:54Z", + "avg_ns": 319998315, + "stddev_ns": 929326, + "avg_ts": 400.00435, + "stddev_ts": 1.15966, + "samples_ns": [ + 319662145, + 319284216, + 321048586 + ], + "samples_ts": [ + 400.423, + 400.897, + 398.694 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:57:55Z", + "avg_ns": 16198273674, + "stddev_ns": 2435302, + "avg_ts": 31.608307, + "stddev_ts": 0.004752, + "samples_ns": [ + 16200999580, + 16196312576, + 16197508866 + ], + "samples_ts": [ + 31.603, + 31.6121, + 31.6098 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 257 + }, + { + "timestamp_utc": "2025-12-08T22:59:02.202527+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:58:44Z\",\n \"avg_ns\": 1343358129,\n \"stddev_ns\": 1695310,\n \"avg_ts\": 381.134809,\n \"stddev_ts\": 0.480964,\n \"samples_ns\": [ 1345077643, 1341688105, 1343308639 ],\n \"samples_ts\": [ 380.647, 381.609, 381.148 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:58:50Z\",\n \"avg_ns\": 3978951545,\n \"stddev_ns\": 3710050,\n \"avg_ts\": 32.169297,\n \"stddev_ts\": 0.030001,\n \"samples_ns\": [ 3980460112, 3974725996, 3981668529 ],\n \"samples_ts\": [ 32.1571, 32.2035, 32.1473 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:58:44Z", + "avg_ns": 1343358129, + "stddev_ns": 1695310, + "avg_ts": 381.134809, + "stddev_ts": 0.480964, + "samples_ns": [ + 1345077643, + 1341688105, + 1343308639 + ], + "samples_ts": [ + 380.647, + 381.609, + 381.148 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:58:50Z", + "avg_ns": 3978951545, + "stddev_ns": 3710050, + "avg_ts": 32.169297, + "stddev_ts": 0.030001, + "samples_ns": [ + 3980460112, + 3974725996, + 3981668529 + ], + "samples_ts": [ + 32.1571, + 32.2035, + 32.1473 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 258 + }, + { + "timestamp_utc": "2025-12-08T22:59:57.038504+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:59:02Z\",\n \"avg_ns\": 1392977289,\n \"stddev_ns\": 847144,\n \"avg_ts\": 367.558129,\n \"stddev_ts\": 0.223250,\n \"samples_ns\": [ 1393935987, 1392333820, 1392662061 ],\n \"samples_ts\": [ 367.305, 367.728, 367.641 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:59:08Z\",\n \"avg_ns\": 16139478942,\n \"stddev_ns\": 21988395,\n \"avg_ts\": 31.723492,\n \"stddev_ts\": 0.043252,\n \"samples_ns\": [ 16114178059, 16153961379, 16150297390 ],\n \"samples_ts\": [ 31.7733, 31.695, 31.7022 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:59:02Z", + "avg_ns": 1392977289, + "stddev_ns": 847144, + "avg_ts": 367.558129, + "stddev_ts": 0.22325, + "samples_ns": [ + 1393935987, + 1392333820, + 1392662061 + ], + "samples_ts": [ + 367.305, + 367.728, + 367.641 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T22:59:08Z", + "avg_ns": 16139478942, + "stddev_ns": 21988395, + "avg_ts": 31.723492, + "stddev_ts": 0.043252, + "samples_ns": [ + 16114178059, + 16153961379, + 16150297390 + ], + "samples_ts": [ + 31.7733, + 31.695, + 31.7022 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 259 + }, + { + "timestamp_utc": "2025-12-08T23:00:11.026820+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:59:57Z\",\n \"avg_ns\": 319612619,\n \"stddev_ns\": 106955,\n \"avg_ts\": 400.484841,\n \"stddev_ts\": 0.130217,\n \"samples_ns\": [ 319605891, 319512223, 319719745 ],\n \"samples_ts\": [ 400.493, 400.611, 400.351 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T22:59:59Z\",\n \"avg_ns\": 3956706387,\n \"stddev_ns\": 6946431,\n \"avg_ts\": 32.350205,\n \"stddev_ts\": 0.056849,\n \"samples_ns\": [ 3960432764, 3960994178, 3948692220 ],\n \"samples_ts\": [ 32.3197, 32.3151, 32.4158 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T22:59:57Z", + "avg_ns": 319612619, + "stddev_ns": 106955, + "avg_ts": 400.484841, + "stddev_ts": 0.130217, + "samples_ns": [ + 319605891, + 319512223, + 319719745 + ], + "samples_ts": [ + 400.493, + 400.611, + 400.351 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T22:59:59Z", + "avg_ns": 3956706387, + "stddev_ns": 6946431, + "avg_ts": 32.350205, + "stddev_ts": 0.056849, + "samples_ns": [ + 3960432764, + 3960994178, + 3948692220 + ], + "samples_ts": [ + 32.3197, + 32.3151, + 32.4158 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 260 + }, + { + "timestamp_utc": "2025-12-08T23:01:01.272784+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:00:11Z\",\n \"avg_ns\": 321390182,\n \"stddev_ns\": 698103,\n \"avg_ts\": 398.271041,\n \"stddev_ts\": 0.864388,\n \"samples_ns\": [ 322163783, 320807157, 321199606 ],\n \"samples_ts\": [ 397.313, 398.994, 398.506 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:00:13Z\",\n \"avg_ns\": 16038815605,\n \"stddev_ns\": 14419526,\n \"avg_ts\": 31.922574,\n \"stddev_ts\": 0.028712,\n \"samples_ns\": [ 16022235224, 16048413034, 16045798559 ],\n \"samples_ts\": [ 31.9556, 31.9035, 31.9087 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:00:11Z", + "avg_ns": 321390182, + "stddev_ns": 698103, + "avg_ts": 398.271041, + "stddev_ts": 0.864388, + "samples_ns": [ + 322163783, + 320807157, + 321199606 + ], + "samples_ts": [ + 397.313, + 398.994, + 398.506 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:00:13Z", + "avg_ns": 16038815605, + "stddev_ns": 14419526, + "avg_ts": 31.922574, + "stddev_ts": 0.028712, + "samples_ns": [ + 16022235224, + 16048413034, + 16045798559 + ], + "samples_ts": [ + 31.9556, + 31.9035, + 31.9087 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 261 + }, + { + "timestamp_utc": "2025-12-08T23:01:21.744220+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:01:02Z\",\n \"avg_ns\": 1909483385,\n \"stddev_ns\": 3184757,\n \"avg_ts\": 268.135849,\n \"stddev_ts\": 0.446819,\n \"samples_ns\": [ 1906902714, 1908505691, 1913041752 ],\n \"samples_ts\": [ 268.498, 268.273, 267.637 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:01:09Z\",\n \"avg_ns\": 3994009762,\n \"stddev_ns\": 3057800,\n \"avg_ts\": 32.048006,\n \"stddev_ts\": 0.024546,\n \"samples_ns\": [ 3996152688, 3995368535, 3990508063 ],\n \"samples_ts\": [ 32.0308, 32.0371, 32.0761 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:01:02Z", + "avg_ns": 1909483385, + "stddev_ns": 3184757, + "avg_ts": 268.135849, + "stddev_ts": 0.446819, + "samples_ns": [ + 1906902714, + 1908505691, + 1913041752 + ], + "samples_ts": [ + 268.498, + 268.273, + 267.637 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:01:09Z", + "avg_ns": 3994009762, + "stddev_ns": 3057800, + "avg_ts": 32.048006, + "stddev_ts": 0.024546, + "samples_ns": [ + 3996152688, + 3995368535, + 3990508063 + ], + "samples_ts": [ + 32.0308, + 32.0371, + 32.0761 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 262 + }, + { + "timestamp_utc": "2025-12-08T23:02:17.814922+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:01:22Z\",\n \"avg_ns\": 1731855598,\n \"stddev_ns\": 2555545,\n \"avg_ts\": 295.637087,\n \"stddev_ts\": 0.436363,\n \"samples_ns\": [ 1729157222, 1734239140, 1732170432 ],\n \"samples_ts\": [ 296.098, 295.23, 295.583 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:01:29Z\",\n \"avg_ns\": 16088238846,\n \"stddev_ns\": 27432854,\n \"avg_ts\": 31.824552,\n \"stddev_ts\": 0.054273,\n \"samples_ns\": [ 16060046072, 16114841429, 16089829039 ],\n \"samples_ts\": [ 31.8804, 31.772, 31.8213 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:01:22Z", + "avg_ns": 1731855598, + "stddev_ns": 2555545, + "avg_ts": 295.637087, + "stddev_ts": 0.436363, + "samples_ns": [ + 1729157222, + 1734239140, + 1732170432 + ], + "samples_ts": [ + 296.098, + 295.23, + 295.583 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:01:29Z", + "avg_ns": 16088238846, + "stddev_ns": 27432854, + "avg_ts": 31.824552, + "stddev_ts": 0.054273, + "samples_ns": [ + 16060046072, + 16114841429, + 16089829039 + ], + "samples_ts": [ + 31.8804, + 31.772, + 31.8213 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 263 + }, + { + "timestamp_utc": "2025-12-08T23:02:31.928742+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:02:18Z\",\n \"avg_ns\": 323049308,\n \"stddev_ns\": 860711,\n \"avg_ts\": 396.226217,\n \"stddev_ts\": 1.054898,\n \"samples_ns\": [ 323981021, 322283856, 322883047 ],\n \"samples_ts\": [ 395.085, 397.165, 396.428 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:02:19Z\",\n \"avg_ns\": 3990973348,\n \"stddev_ns\": 2561115,\n \"avg_ts\": 32.072385,\n \"stddev_ts\": 0.020569,\n \"samples_ns\": [ 3993883076, 3989065237, 3989971732 ],\n \"samples_ts\": [ 32.049, 32.0877, 32.0804 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:02:18Z", + "avg_ns": 323049308, + "stddev_ns": 860711, + "avg_ts": 396.226217, + "stddev_ts": 1.054898, + "samples_ns": [ + 323981021, + 322283856, + 322883047 + ], + "samples_ts": [ + 395.085, + 397.165, + 396.428 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:02:19Z", + "avg_ns": 3990973348, + "stddev_ns": 2561115, + "avg_ts": 32.072385, + "stddev_ts": 0.020569, + "samples_ns": [ + 3993883076, + 3989065237, + 3989971732 + ], + "samples_ts": [ + 32.049, + 32.0877, + 32.0804 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 264 + }, + { + "timestamp_utc": "2025-12-08T23:03:22.919008+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:02:32Z\",\n \"avg_ns\": 327900970,\n \"stddev_ns\": 565469,\n \"avg_ts\": 390.362533,\n \"stddev_ts\": 0.672769,\n \"samples_ns\": [ 328414747, 327992114, 327296051 ],\n \"samples_ts\": [ 389.751, 390.253, 391.083 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:02:33Z\",\n \"avg_ns\": 16270845854,\n \"stddev_ns\": 38854231,\n \"avg_ts\": 31.467445,\n \"stddev_ts\": 0.075044,\n \"samples_ns\": [ 16244691498, 16252354104, 16315491961 ],\n \"samples_ts\": [ 31.518, 31.5031, 31.3812 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:02:32Z", + "avg_ns": 327900970, + "stddev_ns": 565469, + "avg_ts": 390.362533, + "stddev_ts": 0.672769, + "samples_ns": [ + 328414747, + 327992114, + 327296051 + ], + "samples_ts": [ + 389.751, + 390.253, + 391.083 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:02:33Z", + "avg_ns": 16270845854, + "stddev_ns": 38854231, + "avg_ts": 31.467445, + "stddev_ts": 0.075044, + "samples_ns": [ + 16244691498, + 16252354104, + 16315491961 + ], + "samples_ts": [ + 31.518, + 31.5031, + 31.3812 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 265 + }, + { + "timestamp_utc": "2025-12-08T23:03:40.938621+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:03:23Z\",\n \"avg_ns\": 1308295441,\n \"stddev_ns\": 855707,\n \"avg_ts\": 391.349026,\n \"stddev_ts\": 0.255952,\n \"samples_ns\": [ 1307466298, 1309175444, 1308244581 ],\n \"samples_ts\": [ 391.597, 391.086, 391.364 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:03:28Z\",\n \"avg_ns\": 3972387086,\n \"stddev_ns\": 6575413,\n \"avg_ts\": 32.222498,\n \"stddev_ts\": 0.053287,\n \"samples_ns\": [ 3979976495, 3968784000, 3968400763 ],\n \"samples_ts\": [ 32.161, 32.2517, 32.2548 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:03:23Z", + "avg_ns": 1308295441, + "stddev_ns": 855707, + "avg_ts": 391.349026, + "stddev_ts": 0.255952, + "samples_ns": [ + 1307466298, + 1309175444, + 1308244581 + ], + "samples_ts": [ + 391.597, + 391.086, + 391.364 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:03:28Z", + "avg_ns": 3972387086, + "stddev_ns": 6575413, + "avg_ts": 32.222498, + "stddev_ts": 0.053287, + "samples_ns": [ + 3979976495, + 3968784000, + 3968400763 + ], + "samples_ts": [ + 32.161, + 32.2517, + 32.2548 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 266 + }, + { + "timestamp_utc": "2025-12-08T23:04:35.358938+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:03:41Z\",\n \"avg_ns\": 1313373629,\n \"stddev_ns\": 1302370,\n \"avg_ts\": 389.836010,\n \"stddev_ts\": 0.386764,\n \"samples_ns\": [ 1313895234, 1311891305, 1314334348 ],\n \"samples_ts\": [ 389.681, 390.276, 389.551 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:03:46Z\",\n \"avg_ns\": 16100926796,\n \"stddev_ns\": 607368,\n \"avg_ts\": 31.799412,\n \"stddev_ts\": 0.001200,\n \"samples_ns\": [ 16100229869, 16101343199, 16101207320 ],\n \"samples_ts\": [ 31.8008, 31.7986, 31.7989 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:03:41Z", + "avg_ns": 1313373629, + "stddev_ns": 1302370, + "avg_ts": 389.83601, + "stddev_ts": 0.386764, + "samples_ns": [ + 1313895234, + 1311891305, + 1314334348 + ], + "samples_ts": [ + 389.681, + 390.276, + 389.551 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:03:46Z", + "avg_ns": 16100926796, + "stddev_ns": 607368, + "avg_ts": 31.799412, + "stddev_ts": 0.0012, + "samples_ns": [ + 16100229869, + 16101343199, + 16101207320 + ], + "samples_ts": [ + 31.8008, + 31.7986, + 31.7989 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 267 + }, + { + "timestamp_utc": "2025-12-08T23:04:49.426868+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:04:36Z\",\n \"avg_ns\": 320361704,\n \"stddev_ns\": 677039,\n \"avg_ts\": 399.549568,\n \"stddev_ts\": 0.844294,\n \"samples_ns\": [ 319629963, 320964550, 320490601 ],\n \"samples_ts\": [ 400.463, 398.798, 399.388 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:04:37Z\",\n \"avg_ns\": 3963833035,\n \"stddev_ns\": 11960706,\n \"avg_ts\": 32.292172,\n \"stddev_ts\": 0.097603,\n \"samples_ns\": [ 3950060914, 3969826305, 3971611888 ],\n \"samples_ts\": [ 32.4046, 32.2432, 32.2287 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:04:36Z", + "avg_ns": 320361704, + "stddev_ns": 677039, + "avg_ts": 399.549568, + "stddev_ts": 0.844294, + "samples_ns": [ + 319629963, + 320964550, + 320490601 + ], + "samples_ts": [ + 400.463, + 398.798, + 399.388 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:04:37Z", + "avg_ns": 3963833035, + "stddev_ns": 11960706, + "avg_ts": 32.292172, + "stddev_ts": 0.097603, + "samples_ns": [ + 3950060914, + 3969826305, + 3971611888 + ], + "samples_ts": [ + 32.4046, + 32.2432, + 32.2287 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 268 + }, + { + "timestamp_utc": "2025-12-08T23:05:39.699511+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:04:50Z\",\n \"avg_ns\": 322349165,\n \"stddev_ns\": 1042642,\n \"avg_ts\": 397.087710,\n \"stddev_ts\": 1.283098,\n \"samples_ns\": [ 322158077, 321415440, 323473979 ],\n \"samples_ts\": [ 397.32, 398.238, 395.704 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:04:51Z\",\n \"avg_ns\": 16045691285,\n \"stddev_ns\": 25553461,\n \"avg_ts\": 31.908932,\n \"stddev_ts\": 0.050820,\n \"samples_ns\": [ 16070742217, 16046667726, 16019663913 ],\n \"samples_ts\": [ 31.8591, 31.9069, 31.9607 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:04:50Z", + "avg_ns": 322349165, + "stddev_ns": 1042642, + "avg_ts": 397.08771, + "stddev_ts": 1.283098, + "samples_ns": [ + 322158077, + 321415440, + 323473979 + ], + "samples_ts": [ + 397.32, + 398.238, + 395.704 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:04:51Z", + "avg_ns": 16045691285, + "stddev_ns": 25553461, + "avg_ts": 31.908932, + "stddev_ts": 0.05082, + "samples_ns": [ + 16070742217, + 16046667726, + 16019663913 + ], + "samples_ts": [ + 31.8591, + 31.9069, + 31.9607 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 269 + }, + { + "timestamp_utc": "2025-12-08T23:05:58.122127+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:05:40Z\",\n \"avg_ns\": 1397720382,\n \"stddev_ns\": 3923070,\n \"avg_ts\": 366.312671,\n \"stddev_ts\": 1.027446,\n \"samples_ns\": [ 1397102945, 1394142822, 1401915380 ],\n \"samples_ts\": [ 366.473, 367.251, 365.215 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:05:46Z\",\n \"avg_ns\": 3988161917,\n \"stddev_ns\": 9441470,\n \"avg_ts\": 32.095106,\n \"stddev_ts\": 0.075965,\n \"samples_ns\": [ 3987657662, 3997845199, 3978982891 ],\n \"samples_ts\": [ 32.099, 32.0172, 32.169 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:05:40Z", + "avg_ns": 1397720382, + "stddev_ns": 3923070, + "avg_ts": 366.312671, + "stddev_ts": 1.027446, + "samples_ns": [ + 1397102945, + 1394142822, + 1401915380 + ], + "samples_ts": [ + 366.473, + 367.251, + 365.215 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:05:46Z", + "avg_ns": 3988161917, + "stddev_ns": 9441470, + "avg_ts": 32.095106, + "stddev_ts": 0.075965, + "samples_ns": [ + 3987657662, + 3997845199, + 3978982891 + ], + "samples_ts": [ + 32.099, + 32.0172, + 32.169 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 270 + }, + { + "timestamp_utc": "2025-12-08T23:06:52.734718+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:05:58Z\",\n \"avg_ns\": 1393827465,\n \"stddev_ns\": 2916754,\n \"avg_ts\": 367.334914,\n \"stddev_ts\": 0.767704,\n \"samples_ns\": [ 1392093807, 1392193918, 1397194671 ],\n \"samples_ts\": [ 367.791, 367.765, 366.449 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:06:04Z\",\n \"avg_ns\": 16061833441,\n \"stddev_ns\": 10466226,\n \"avg_ts\": 31.876818,\n \"stddev_ts\": 0.020774,\n \"samples_ns\": [ 16050890875, 16062862080, 16071747368 ],\n \"samples_ts\": [ 31.8985, 31.8748, 31.8571 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:05:58Z", + "avg_ns": 1393827465, + "stddev_ns": 2916754, + "avg_ts": 367.334914, + "stddev_ts": 0.767704, + "samples_ns": [ + 1392093807, + 1392193918, + 1397194671 + ], + "samples_ts": [ + 367.791, + 367.765, + 366.449 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:06:04Z", + "avg_ns": 16061833441, + "stddev_ns": 10466226, + "avg_ts": 31.876818, + "stddev_ts": 0.020774, + "samples_ns": [ + 16050890875, + 16062862080, + 16071747368 + ], + "samples_ts": [ + 31.8985, + 31.8748, + 31.8571 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 271 + }, + { + "timestamp_utc": "2025-12-08T23:07:06.763597+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:06:53Z\",\n \"avg_ns\": 321608858,\n \"stddev_ns\": 1946203,\n \"avg_ts\": 398.008677,\n \"stddev_ts\": 2.401649,\n \"samples_ns\": [ 320898117, 323810534, 320117923 ],\n \"samples_ts\": [ 398.88, 395.293, 399.853 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:06:54Z\",\n \"avg_ns\": 3966520755,\n \"stddev_ns\": 7332516,\n \"avg_ts\": 32.270168,\n \"stddev_ts\": 0.059620,\n \"samples_ns\": [ 3974491052, 3965008988, 3960062226 ],\n \"samples_ts\": [ 32.2054, 32.2824, 32.3227 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:06:53Z", + "avg_ns": 321608858, + "stddev_ns": 1946203, + "avg_ts": 398.008677, + "stddev_ts": 2.401649, + "samples_ns": [ + 320898117, + 323810534, + 320117923 + ], + "samples_ts": [ + 398.88, + 395.293, + 399.853 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:06:54Z", + "avg_ns": 3966520755, + "stddev_ns": 7332516, + "avg_ts": 32.270168, + "stddev_ts": 0.05962, + "samples_ns": [ + 3974491052, + 3965008988, + 3960062226 + ], + "samples_ts": [ + 32.2054, + 32.2824, + 32.3227 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 272 + }, + { + "timestamp_utc": "2025-12-08T23:07:57.271664+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:07:07Z\",\n \"avg_ns\": 322249807,\n \"stddev_ns\": 3606525,\n \"avg_ts\": 397.240333,\n \"stddev_ts\": 4.417271,\n \"samples_ns\": [ 320206664, 326414021, 320128736 ],\n \"samples_ts\": [ 399.742, 392.14, 399.839 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:07:08Z\",\n \"avg_ns\": 16126129717,\n \"stddev_ns\": 13194954,\n \"avg_ts\": 31.749728,\n \"stddev_ts\": 0.025970,\n \"samples_ns\": [ 16140629982, 16122928962, 16114830208 ],\n \"samples_ts\": [ 31.7212, 31.756, 31.772 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:07:07Z", + "avg_ns": 322249807, + "stddev_ns": 3606525, + "avg_ts": 397.240333, + "stddev_ts": 4.417271, + "samples_ns": [ + 320206664, + 326414021, + 320128736 + ], + "samples_ts": [ + 399.742, + 392.14, + 399.839 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:07:08Z", + "avg_ns": 16126129717, + "stddev_ns": 13194954, + "avg_ts": 31.749728, + "stddev_ts": 0.02597, + "samples_ns": [ + 16140629982, + 16122928962, + 16114830208 + ], + "samples_ts": [ + 31.7212, + 31.756, + 31.772 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 273 + }, + { + "timestamp_utc": "2025-12-08T23:08:17.756294+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:07:58Z\",\n \"avg_ns\": 1897271225,\n \"stddev_ns\": 1985578,\n \"avg_ts\": 269.861455,\n \"stddev_ts\": 0.282350,\n \"samples_ns\": [ 1895473733, 1896937376, 1899402566 ],\n \"samples_ts\": [ 270.117, 269.909, 269.558 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:08:05Z\",\n \"avg_ns\": 4013522974,\n \"stddev_ns\": 17529452,\n \"avg_ts\": 31.892586,\n \"stddev_ts\": 0.139026,\n \"samples_ns\": [ 4033204207, 4007776319, 3999588398 ],\n \"samples_ts\": [ 31.7366, 31.9379, 32.0033 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:07:58Z", + "avg_ns": 1897271225, + "stddev_ns": 1985578, + "avg_ts": 269.861455, + "stddev_ts": 0.28235, + "samples_ns": [ + 1895473733, + 1896937376, + 1899402566 + ], + "samples_ts": [ + 270.117, + 269.909, + 269.558 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:08:05Z", + "avg_ns": 4013522974, + "stddev_ns": 17529452, + "avg_ts": 31.892586, + "stddev_ts": 0.139026, + "samples_ns": [ + 4033204207, + 4007776319, + 3999588398 + ], + "samples_ts": [ + 31.7366, + 31.9379, + 32.0033 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 274 + }, + { + "timestamp_utc": "2025-12-08T23:09:13.687814+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:08:18Z\",\n \"avg_ns\": 1675201527,\n \"stddev_ns\": 8317234,\n \"avg_ts\": 305.639882,\n \"stddev_ts\": 1.514315,\n \"samples_ns\": [ 1668472505, 1672631823, 1684500254 ],\n \"samples_ts\": [ 306.868, 306.104, 303.948 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:08:25Z\",\n \"avg_ns\": 16118489465,\n \"stddev_ns\": 27384422,\n \"avg_ts\": 31.764824,\n \"stddev_ts\": 0.053995,\n \"samples_ns\": [ 16088642125, 16124371078, 16142455192 ],\n \"samples_ts\": [ 31.8237, 31.7532, 31.7176 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:08:18Z", + "avg_ns": 1675201527, + "stddev_ns": 8317234, + "avg_ts": 305.639882, + "stddev_ts": 1.514315, + "samples_ns": [ + 1668472505, + 1672631823, + 1684500254 + ], + "samples_ts": [ + 306.868, + 306.104, + 303.948 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:08:25Z", + "avg_ns": 16118489465, + "stddev_ns": 27384422, + "avg_ts": 31.764824, + "stddev_ts": 0.053995, + "samples_ns": [ + 16088642125, + 16124371078, + 16142455192 + ], + "samples_ts": [ + 31.8237, + 31.7532, + 31.7176 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 275 + }, + { + "timestamp_utc": "2025-12-08T23:09:27.753126+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:09:14Z\",\n \"avg_ns\": 320354919,\n \"stddev_ns\": 668742,\n \"avg_ts\": 399.558002,\n \"stddev_ts\": 0.833095,\n \"samples_ns\": [ 319925532, 321125433, 320013792 ],\n \"samples_ts\": [ 400.093, 398.598, 399.983 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:09:15Z\",\n \"avg_ns\": 3979894863,\n \"stddev_ns\": 2925416,\n \"avg_ts\": 32.161665,\n \"stddev_ts\": 0.023625,\n \"samples_ns\": [ 3983252563, 3977902208, 3978529819 ],\n \"samples_ts\": [ 32.1345, 32.1778, 32.1727 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:09:14Z", + "avg_ns": 320354919, + "stddev_ns": 668742, + "avg_ts": 399.558002, + "stddev_ts": 0.833095, + "samples_ns": [ + 319925532, + 321125433, + 320013792 + ], + "samples_ts": [ + 400.093, + 398.598, + 399.983 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:09:15Z", + "avg_ns": 3979894863, + "stddev_ns": 2925416, + "avg_ts": 32.161665, + "stddev_ts": 0.023625, + "samples_ns": [ + 3983252563, + 3977902208, + 3978529819 + ], + "samples_ts": [ + 32.1345, + 32.1778, + 32.1727 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 276 + }, + { + "timestamp_utc": "2025-12-08T23:10:17.961909+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:09:28Z\",\n \"avg_ns\": 320718810,\n \"stddev_ns\": 403069,\n \"avg_ts\": 399.103921,\n \"stddev_ts\": 0.501511,\n \"samples_ns\": [ 320331720, 321136154, 320688556 ],\n \"samples_ts\": [ 399.586, 398.585, 399.141 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:09:29Z\",\n \"avg_ns\": 16027842089,\n \"stddev_ns\": 23346801,\n \"avg_ts\": 31.944458,\n \"stddev_ts\": 0.046505,\n \"samples_ns\": [ 16053700195, 16021513026, 16008313048 ],\n \"samples_ts\": [ 31.893, 31.957, 31.9834 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:09:28Z", + "avg_ns": 320718810, + "stddev_ns": 403069, + "avg_ts": 399.103921, + "stddev_ts": 0.501511, + "samples_ns": [ + 320331720, + 321136154, + 320688556 + ], + "samples_ts": [ + 399.586, + 398.585, + 399.141 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:09:29Z", + "avg_ns": 16027842089, + "stddev_ns": 23346801, + "avg_ts": 31.944458, + "stddev_ts": 0.046505, + "samples_ns": [ + 16053700195, + 16021513026, + 16008313048 + ], + "samples_ts": [ + 31.893, + 31.957, + 31.9834 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 277 + }, + { + "timestamp_utc": "2025-12-08T23:10:36.281917+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:10:18Z\",\n \"avg_ns\": 1341024874,\n \"stddev_ns\": 1105066,\n \"avg_ts\": 381.797714,\n \"stddev_ts\": 0.314144,\n \"samples_ns\": [ 1342280400, 1340587457, 1340206767 ],\n \"samples_ts\": [ 381.44, 381.922, 382.031 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:10:24Z\",\n \"avg_ns\": 4036634555,\n \"stddev_ns\": 6198048,\n \"avg_ts\": 31.709633,\n \"stddev_ts\": 0.048726,\n \"samples_ns\": [ 4040457531, 4029484139, 4039961997 ],\n \"samples_ts\": [ 31.6796, 31.7659, 31.6835 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:10:18Z", + "avg_ns": 1341024874, + "stddev_ns": 1105066, + "avg_ts": 381.797714, + "stddev_ts": 0.314144, + "samples_ns": [ + 1342280400, + 1340587457, + 1340206767 + ], + "samples_ts": [ + 381.44, + 381.922, + 382.031 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:10:24Z", + "avg_ns": 4036634555, + "stddev_ns": 6198048, + "avg_ts": 31.709633, + "stddev_ts": 0.048726, + "samples_ns": [ + 4040457531, + 4029484139, + 4039961997 + ], + "samples_ts": [ + 31.6796, + 31.7659, + 31.6835 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 278 + }, + { + "timestamp_utc": "2025-12-08T23:11:30.586596+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:10:37Z\",\n \"avg_ns\": 1313563824,\n \"stddev_ns\": 1059505,\n \"avg_ts\": 389.779478,\n \"stddev_ts\": 0.314148,\n \"samples_ns\": [ 1313393459, 1312600451, 1314697563 ],\n \"samples_ts\": [ 389.83, 390.065, 389.443 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:10:42Z\",\n \"avg_ns\": 16066921650,\n \"stddev_ns\": 6623880,\n \"avg_ts\": 31.866718,\n \"stddev_ts\": 0.013139,\n \"samples_ns\": [ 16059897993, 16073055876, 16067811081 ],\n \"samples_ts\": [ 31.8807, 31.8546, 31.865 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:10:37Z", + "avg_ns": 1313563824, + "stddev_ns": 1059505, + "avg_ts": 389.779478, + "stddev_ts": 0.314148, + "samples_ns": [ + 1313393459, + 1312600451, + 1314697563 + ], + "samples_ts": [ + 389.83, + 390.065, + 389.443 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:10:42Z", + "avg_ns": 16066921650, + "stddev_ns": 6623880, + "avg_ts": 31.866718, + "stddev_ts": 0.013139, + "samples_ns": [ + 16059897993, + 16073055876, + 16067811081 + ], + "samples_ts": [ + 31.8807, + 31.8546, + 31.865 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 279 + }, + { + "timestamp_utc": "2025-12-08T23:11:44.614935+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:11:31Z\",\n \"avg_ns\": 319739596,\n \"stddev_ns\": 883230,\n \"avg_ts\": 400.327805,\n \"stddev_ts\": 1.105040,\n \"samples_ns\": [ 319578810, 320692174, 318947804 ],\n \"samples_ts\": [ 400.527, 399.137, 401.32 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:11:32Z\",\n \"avg_ns\": 3966315706,\n \"stddev_ns\": 7658536,\n \"avg_ts\": 32.271843,\n \"stddev_ts\": 0.062274,\n \"samples_ns\": [ 3964557607, 3959689091, 3974700420 ],\n \"samples_ts\": [ 32.2861, 32.3258, 32.2037 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:11:31Z", + "avg_ns": 319739596, + "stddev_ns": 883230, + "avg_ts": 400.327805, + "stddev_ts": 1.10504, + "samples_ns": [ + 319578810, + 320692174, + 318947804 + ], + "samples_ts": [ + 400.527, + 399.137, + 401.32 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:11:32Z", + "avg_ns": 3966315706, + "stddev_ns": 7658536, + "avg_ts": 32.271843, + "stddev_ts": 0.062274, + "samples_ns": [ + 3964557607, + 3959689091, + 3974700420 + ], + "samples_ts": [ + 32.2861, + 32.3258, + 32.2037 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 280 + }, + { + "timestamp_utc": "2025-12-08T23:12:35.321012+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:11:45Z\",\n \"avg_ns\": 321662014,\n \"stddev_ns\": 4608755,\n \"avg_ts\": 397.987254,\n \"stddev_ts\": 5.657269,\n \"samples_ns\": [ 318593947, 326961752, 319430344 ],\n \"samples_ts\": [ 401.765, 391.483, 400.713 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:11:46Z\",\n \"avg_ns\": 16189232215,\n \"stddev_ns\": 9628203,\n \"avg_ts\": 31.625967,\n \"stddev_ts\": 0.018810,\n \"samples_ns\": [ 16197827061, 16191040739, 16178828846 ],\n \"samples_ts\": [ 31.6092, 31.6224, 31.6463 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:11:45Z", + "avg_ns": 321662014, + "stddev_ns": 4608755, + "avg_ts": 397.987254, + "stddev_ts": 5.657269, + "samples_ns": [ + 318593947, + 326961752, + 319430344 + ], + "samples_ts": [ + 401.765, + 391.483, + 400.713 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:11:46Z", + "avg_ns": 16189232215, + "stddev_ns": 9628203, + "avg_ts": 31.625967, + "stddev_ts": 0.01881, + "samples_ns": [ + 16197827061, + 16191040739, + 16178828846 + ], + "samples_ts": [ + 31.6092, + 31.6224, + 31.6463 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 281 + }, + { + "timestamp_utc": "2025-12-08T23:12:53.722469+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:12:36Z\",\n \"avg_ns\": 1395446756,\n \"stddev_ns\": 5291732,\n \"avg_ts\": 366.911100,\n \"stddev_ts\": 1.390158,\n \"samples_ns\": [ 1390612617, 1401100238, 1394627414 ],\n \"samples_ts\": [ 368.183, 365.427, 367.123 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:12:41Z\",\n \"avg_ns\": 3990262552,\n \"stddev_ns\": 10326195,\n \"avg_ts\": 32.078233,\n \"stddev_ts\": 0.083136,\n \"samples_ns\": [ 3996376733, 3978340423, 3996070501 ],\n \"samples_ts\": [ 32.029, 32.1742, 32.0315 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:12:36Z", + "avg_ns": 1395446756, + "stddev_ns": 5291732, + "avg_ts": 366.9111, + "stddev_ts": 1.390158, + "samples_ns": [ + 1390612617, + 1401100238, + 1394627414 + ], + "samples_ts": [ + 368.183, + 365.427, + 367.123 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:12:41Z", + "avg_ns": 3990262552, + "stddev_ns": 10326195, + "avg_ts": 32.078233, + "stddev_ts": 0.083136, + "samples_ns": [ + 3996376733, + 3978340423, + 3996070501 + ], + "samples_ts": [ + 32.029, + 32.1742, + 32.0315 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 282 + }, + { + "timestamp_utc": "2025-12-08T23:13:48.912715+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:12:54Z\",\n \"avg_ns\": 1473079808,\n \"stddev_ns\": 2266331,\n \"avg_ts\": 347.571669,\n \"stddev_ts\": 0.534702,\n \"samples_ns\": [ 1470848842, 1473010659, 1475379923 ],\n \"samples_ts\": [ 348.098, 347.587, 347.029 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:13:00Z\",\n \"avg_ns\": 16131952614,\n \"stddev_ns\": 9995396,\n \"avg_ts\": 31.738262,\n \"stddev_ts\": 0.019658,\n \"samples_ns\": [ 16123203948, 16142843949, 16129809947 ],\n \"samples_ts\": [ 31.7555, 31.7168, 31.7425 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:12:54Z", + "avg_ns": 1473079808, + "stddev_ns": 2266331, + "avg_ts": 347.571669, + "stddev_ts": 0.534702, + "samples_ns": [ + 1470848842, + 1473010659, + 1475379923 + ], + "samples_ts": [ + 348.098, + 347.587, + 347.029 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:13:00Z", + "avg_ns": 16131952614, + "stddev_ns": 9995396, + "avg_ts": 31.738262, + "stddev_ts": 0.019658, + "samples_ns": [ + 16123203948, + 16142843949, + 16129809947 + ], + "samples_ts": [ + 31.7555, + 31.7168, + 31.7425 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 283 + }, + { + "timestamp_utc": "2025-12-08T23:14:02.905128+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:13:49Z\",\n \"avg_ns\": 317954418,\n \"stddev_ns\": 767709,\n \"avg_ts\": 402.574990,\n \"stddev_ts\": 0.971601,\n \"samples_ns\": [ 318711291, 317177108, 317974857 ],\n \"samples_ts\": [ 401.617, 403.56, 402.548 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:13:50Z\",\n \"avg_ns\": 3956876974,\n \"stddev_ns\": 261019,\n \"avg_ts\": 32.348744,\n \"stddev_ts\": 0.002006,\n \"samples_ns\": [ 3956995320, 3956594837, 3957040767 ],\n \"samples_ts\": [ 32.3478, 32.3511, 32.3474 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:13:49Z", + "avg_ns": 317954418, + "stddev_ns": 767709, + "avg_ts": 402.57499, + "stddev_ts": 0.971601, + "samples_ns": [ + 318711291, + 317177108, + 317974857 + ], + "samples_ts": [ + 401.617, + 403.56, + 402.548 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:13:50Z", + "avg_ns": 3956876974, + "stddev_ns": 261019, + "avg_ts": 32.348744, + "stddev_ts": 0.002006, + "samples_ns": [ + 3956995320, + 3956594837, + 3957040767 + ], + "samples_ts": [ + 32.3478, + 32.3511, + 32.3474 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 284 + }, + { + "timestamp_utc": "2025-12-08T23:14:54.293384+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:14:03Z\",\n \"avg_ns\": 326578880,\n \"stddev_ns\": 599861,\n \"avg_ts\": 391.942944,\n \"stddev_ts\": 0.719673,\n \"samples_ns\": [ 327213616, 326021373, 326501651 ],\n \"samples_ts\": [ 391.182, 392.612, 392.035 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:14:04Z\",\n \"avg_ns\": 16398435288,\n \"stddev_ns\": 31029175,\n \"avg_ts\": 31.222566,\n \"stddev_ts\": 0.059016,\n \"samples_ns\": [ 16434156357, 16378167006, 16382982502 ],\n \"samples_ts\": [ 31.1546, 31.2611, 31.2519 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:14:03Z", + "avg_ns": 326578880, + "stddev_ns": 599861, + "avg_ts": 391.942944, + "stddev_ts": 0.719673, + "samples_ns": [ + 327213616, + 326021373, + 326501651 + ], + "samples_ts": [ + 391.182, + 392.612, + 392.035 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:14:04Z", + "avg_ns": 16398435288, + "stddev_ns": 31029175, + "avg_ts": 31.222566, + "stddev_ts": 0.059016, + "samples_ns": [ + 16434156357, + 16378167006, + 16382982502 + ], + "samples_ts": [ + 31.1546, + 31.2611, + 31.2519 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 285 + }, + { + "timestamp_utc": "2025-12-08T23:15:14.644652+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:14:55Z\",\n \"avg_ns\": 1901672799,\n \"stddev_ns\": 5163373,\n \"avg_ts\": 269.237964,\n \"stddev_ts\": 0.729845,\n \"samples_ns\": [ 1898433440, 1907626823, 1898958136 ],\n \"samples_ts\": [ 269.696, 268.396, 269.622 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:15:02Z\",\n \"avg_ns\": 3965582436,\n \"stddev_ns\": 8122820,\n \"avg_ts\": 32.277821,\n \"stddev_ts\": 0.066192,\n \"samples_ns\": [ 3970449590, 3956205564, 3970092155 ],\n \"samples_ts\": [ 32.2382, 32.3542, 32.2411 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:14:55Z", + "avg_ns": 1901672799, + "stddev_ns": 5163373, + "avg_ts": 269.237964, + "stddev_ts": 0.729845, + "samples_ns": [ + 1898433440, + 1907626823, + 1898958136 + ], + "samples_ts": [ + 269.696, + 268.396, + 269.622 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:15:02Z", + "avg_ns": 3965582436, + "stddev_ns": 8122820, + "avg_ts": 32.277821, + "stddev_ts": 0.066192, + "samples_ns": [ + 3970449590, + 3956205564, + 3970092155 + ], + "samples_ts": [ + 32.2382, + 32.3542, + 32.2411 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 286 + }, + { + "timestamp_utc": "2025-12-08T23:16:10.814258+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:15:15Z\",\n \"avg_ns\": 1724102571,\n \"stddev_ns\": 1423761,\n \"avg_ts\": 296.966225,\n \"stddev_ts\": 0.245316,\n \"samples_ns\": [ 1724523033, 1722515930, 1725268750 ],\n \"samples_ts\": [ 296.894, 297.24, 296.765 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 270M Q8_0\",\n \"model_size\": 285018624,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:15:22Z\",\n \"avg_ns\": 16140432115,\n \"stddev_ns\": 15402455,\n \"avg_ts\": 31.721599,\n \"stddev_ts\": 0.030286,\n \"samples_ns\": [ 16148295719, 16150314298, 16122686330 ],\n \"samples_ts\": [ 31.7061, 31.7022, 31.7565 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:15:15Z", + "avg_ns": 1724102571, + "stddev_ns": 1423761, + "avg_ts": 296.966225, + "stddev_ts": 0.245316, + "samples_ns": [ + 1724523033, + 1722515930, + 1725268750 + ], + "samples_ts": [ + 296.894, + 297.24, + 296.765 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_type": "gemma3 270M Q8_0", + "model_size": 285018624, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:15:22Z", + "avg_ns": 16140432115, + "stddev_ns": 15402455, + "avg_ts": 31.721599, + "stddev_ts": 0.030286, + "samples_ns": [ + 16148295719, + 16150314298, + 16122686330 + ], + "samples_ts": [ + 31.7061, + 31.7022, + 31.7565 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 287 + }, + { + "timestamp_utc": "2025-12-08T23:16:29.892198+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:16:14Z\",\n \"avg_ns\": 1120158031,\n \"stddev_ns\": 12212326,\n \"avg_ts\": 114.278590,\n \"stddev_ts\": 1.238100,\n \"samples_ns\": [ 1134259485, 1113127975, 1113086635 ],\n \"samples_ts\": [ 112.849, 114.991, 114.996 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:16:18Z\",\n \"avg_ns\": 3692859261,\n \"stddev_ns\": 1638707,\n \"avg_ts\": 34.661493,\n \"stddev_ts\": 0.015360,\n \"samples_ns\": [ 3692871824, 3691216564, 3694489397 ],\n \"samples_ts\": [ 34.6614, 34.6769, 34.6462 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:16:14Z", + "avg_ns": 1120158031, + "stddev_ns": 12212326, + "avg_ts": 114.27859, + "stddev_ts": 1.2381, + "samples_ns": [ + 1134259485, + 1113127975, + 1113086635 + ], + "samples_ts": [ + 112.849, + 114.991, + 114.996 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:16:18Z", + "avg_ns": 3692859261, + "stddev_ns": 1638707, + "avg_ts": 34.661493, + "stddev_ts": 0.01536, + "samples_ns": [ + 3692871824, + 3691216564, + 3694489397 + ], + "samples_ts": [ + 34.6614, + 34.6769, + 34.6462 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 288 + }, + { + "timestamp_utc": "2025-12-08T23:17:20.949120+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:16:30Z\",\n \"avg_ns\": 1117132183,\n \"stddev_ns\": 9295003,\n \"avg_ts\": 114.584363,\n \"stddev_ts\": 0.948821,\n \"samples_ns\": [ 1111692619, 1127864657, 1111839275 ],\n \"samples_ts\": [ 115.14, 113.489, 115.125 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:16:35Z\",\n \"avg_ns\": 15242637061,\n \"stddev_ns\": 14805936,\n \"avg_ts\": 33.590009,\n \"stddev_ts\": 0.032607,\n \"samples_ns\": [ 15259709424, 15233335306, 15234866455 ],\n \"samples_ts\": [ 33.5524, 33.6105, 33.6071 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:16:30Z", + "avg_ns": 1117132183, + "stddev_ns": 9295003, + "avg_ts": 114.584363, + "stddev_ts": 0.948821, + "samples_ns": [ + 1111692619, + 1127864657, + 1111839275 + ], + "samples_ts": [ + 115.14, + 113.489, + 115.125 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:16:35Z", + "avg_ns": 15242637061, + "stddev_ns": 14805936, + "avg_ts": 33.590009, + "stddev_ts": 0.032607, + "samples_ns": [ + 15259709424, + 15233335306, + 15234866455 + ], + "samples_ts": [ + 33.5524, + 33.6105, + 33.6071 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 289 + }, + { + "timestamp_utc": "2025-12-08T23:17:51.401879+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:17:21Z\",\n \"avg_ns\": 4645609031,\n \"stddev_ns\": 115373,\n \"avg_ts\": 110.211599,\n \"stddev_ts\": 0.002208,\n \"samples_ns\": [ 4645699183, 4645614650, 4645513261 ],\n \"samples_ts\": [ 110.209, 110.211, 110.214 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:17:40Z\",\n \"avg_ns\": 3670013316,\n \"stddev_ns\": 242384,\n \"avg_ts\": 34.877258,\n \"stddev_ts\": 0.002230,\n \"samples_ns\": [ 3670186338, 3669746174, 3670107437 ],\n \"samples_ts\": [ 34.8756, 34.8798, 34.8764 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:17:21Z", + "avg_ns": 4645609031, + "stddev_ns": 115373, + "avg_ts": 110.211599, + "stddev_ts": 0.002208, + "samples_ns": [ + 4645699183, + 4645614650, + 4645513261 + ], + "samples_ts": [ + 110.209, + 110.211, + 110.214 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:17:40Z", + "avg_ns": 3670013316, + "stddev_ns": 242384, + "avg_ts": 34.877258, + "stddev_ts": 0.00223, + "samples_ns": [ + 3670186338, + 3669746174, + 3670107437 + ], + "samples_ts": [ + 34.8756, + 34.8798, + 34.8764 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 290 + }, + { + "timestamp_utc": "2025-12-08T23:18:56.463452+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:17:52Z\",\n \"avg_ns\": 4639252027,\n \"stddev_ns\": 345771,\n \"avg_ts\": 110.362619,\n \"stddev_ts\": 0.007900,\n \"samples_ns\": [ 4639587045, 4638922958, 4639246080 ],\n \"samples_ts\": [ 110.355, 110.37, 110.363 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:18:10Z\",\n \"avg_ns\": 15199574899,\n \"stddev_ns\": 2319138,\n \"avg_ts\": 33.685153,\n \"stddev_ts\": 0.005125,\n \"samples_ns\": [ 15198872519, 15202157233, 15197694947 ],\n \"samples_ts\": [ 33.6867, 33.6794, 33.6893 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:17:52Z", + "avg_ns": 4639252027, + "stddev_ns": 345771, + "avg_ts": 110.362619, + "stddev_ts": 0.0079, + "samples_ns": [ + 4639587045, + 4638922958, + 4639246080 + ], + "samples_ts": [ + 110.355, + 110.37, + 110.363 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:18:10Z", + "avg_ns": 15199574899, + "stddev_ns": 2319138, + "avg_ts": 33.685153, + "stddev_ts": 0.005125, + "samples_ns": [ + 15198872519, + 15202157233, + 15197694947 + ], + "samples_ts": [ + 33.6867, + 33.6794, + 33.6893 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 291 + }, + { + "timestamp_utc": "2025-12-08T23:19:12.899087+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:18:57Z\",\n \"avg_ns\": 1112217658,\n \"stddev_ns\": 76592,\n \"avg_ts\": 115.085388,\n \"stddev_ts\": 0.007925,\n \"samples_ns\": [ 1112156636, 1112303609, 1112192729 ],\n \"samples_ts\": [ 115.092, 115.076, 115.088 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:19:01Z\",\n \"avg_ns\": 3707903467,\n \"stddev_ns\": 229619,\n \"avg_ts\": 34.520856,\n \"stddev_ts\": 0.002138,\n \"samples_ns\": [ 3707866257, 3707694725, 3708149419 ],\n \"samples_ts\": [ 34.5212, 34.5228, 34.5186 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:18:57Z", + "avg_ns": 1112217658, + "stddev_ns": 76592, + "avg_ts": 115.085388, + "stddev_ts": 0.007925, + "samples_ns": [ + 1112156636, + 1112303609, + 1112192729 + ], + "samples_ts": [ + 115.092, + 115.076, + 115.088 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:19:01Z", + "avg_ns": 3707903467, + "stddev_ns": 229619, + "avg_ts": 34.520856, + "stddev_ts": 0.002138, + "samples_ns": [ + 3707866257, + 3707694725, + 3708149419 + ], + "samples_ts": [ + 34.5212, + 34.5228, + 34.5186 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 292 + }, + { + "timestamp_utc": "2025-12-08T23:20:03.970023+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:19:13Z\",\n \"avg_ns\": 1111542378,\n \"stddev_ns\": 187875,\n \"avg_ts\": 115.155306,\n \"stddev_ts\": 0.019463,\n \"samples_ns\": [ 1111363819, 1111524958, 1111738357 ],\n \"samples_ts\": [ 115.174, 115.157, 115.135 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:19:18Z\",\n \"avg_ns\": 15253750223,\n \"stddev_ns\": 18844710,\n \"avg_ts\": 33.565550,\n \"stddev_ts\": 0.041495,\n \"samples_ns\": [ 15231993404, 15264358079, 15264899188 ],\n \"samples_ts\": [ 33.6135, 33.5422, 33.541 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:19:13Z", + "avg_ns": 1111542378, + "stddev_ns": 187875, + "avg_ts": 115.155306, + "stddev_ts": 0.019463, + "samples_ns": [ + 1111363819, + 1111524958, + 1111738357 + ], + "samples_ts": [ + 115.174, + 115.157, + 115.135 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:19:18Z", + "avg_ns": 15253750223, + "stddev_ns": 18844710, + "avg_ts": 33.56555, + "stddev_ts": 0.041495, + "samples_ns": [ + 15231993404, + 15264358079, + 15264899188 + ], + "samples_ts": [ + 33.6135, + 33.5422, + 33.541 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 293 + }, + { + "timestamp_utc": "2025-12-08T23:20:34.492023+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:20:04Z\",\n \"avg_ns\": 4672640537,\n \"stddev_ns\": 371087,\n \"avg_ts\": 109.574019,\n \"stddev_ts\": 0.008702,\n \"samples_ns\": [ 4672482083, 4672374981, 4673064547 ],\n \"samples_ts\": [ 109.578, 109.58, 109.564 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:20:23Z\",\n \"avg_ns\": 3656846708,\n \"stddev_ns\": 733114,\n \"avg_ts\": 35.002835,\n \"stddev_ts\": 0.006970,\n \"samples_ns\": [ 3657360580, 3657166050, 3656013496 ],\n \"samples_ts\": [ 34.9979, 34.9998, 35.0108 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:20:04Z", + "avg_ns": 4672640537, + "stddev_ns": 371087, + "avg_ts": 109.574019, + "stddev_ts": 0.008702, + "samples_ns": [ + 4672482083, + 4672374981, + 4673064547 + ], + "samples_ts": [ + 109.578, + 109.58, + 109.564 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:20:23Z", + "avg_ns": 3656846708, + "stddev_ns": 733114, + "avg_ts": 35.002835, + "stddev_ts": 0.00697, + "samples_ns": [ + 3657360580, + 3657166050, + 3656013496 + ], + "samples_ts": [ + 34.9979, + 34.9998, + 35.0108 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 294 + }, + { + "timestamp_utc": "2025-12-08T23:21:39.167667+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:20:35Z\",\n \"avg_ns\": 4661433270,\n \"stddev_ns\": 179766,\n \"avg_ts\": 109.837462,\n \"stddev_ts\": 0.003573,\n \"samples_ns\": [ 4661555291, 4661263511, 4661481010 ],\n \"samples_ts\": [ 109.835, 109.841, 109.836 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:20:53Z\",\n \"avg_ns\": 15054601459,\n \"stddev_ns\": 15637534,\n \"avg_ts\": 34.009560,\n \"stddev_ts\": 0.035347,\n \"samples_ns\": [ 15036546193, 15063780587, 15063477598 ],\n \"samples_ts\": [ 34.0504, 33.9888, 33.9895 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:20:35Z", + "avg_ns": 4661433270, + "stddev_ns": 179766, + "avg_ts": 109.837462, + "stddev_ts": 0.003573, + "samples_ns": [ + 4661555291, + 4661263511, + 4661481010 + ], + "samples_ts": [ + 109.835, + 109.841, + 109.836 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:20:53Z", + "avg_ns": 15054601459, + "stddev_ns": 15637534, + "avg_ts": 34.00956, + "stddev_ts": 0.035347, + "samples_ns": [ + 15036546193, + 15063780587, + 15063477598 + ], + "samples_ts": [ + 34.0504, + 33.9888, + 33.9895 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 295 + }, + { + "timestamp_utc": "2025-12-08T23:21:55.475565+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:21:39Z\",\n \"avg_ns\": 1112267200,\n \"stddev_ns\": 192063,\n \"avg_ts\": 115.080264,\n \"stddev_ts\": 0.019870,\n \"samples_ns\": [ 1112151958, 1112160724, 1112488918 ],\n \"samples_ts\": [ 115.092, 115.091, 115.057 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:21:44Z\",\n \"avg_ns\": 3665181154,\n \"stddev_ns\": 1366883,\n \"avg_ts\": 34.923243,\n \"stddev_ts\": 0.013027,\n \"samples_ns\": [ 3663616981, 3666145975, 3665780506 ],\n \"samples_ts\": [ 34.9382, 34.914, 34.9175 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:21:39Z", + "avg_ns": 1112267200, + "stddev_ns": 192063, + "avg_ts": 115.080264, + "stddev_ts": 0.01987, + "samples_ns": [ + 1112151958, + 1112160724, + 1112488918 + ], + "samples_ts": [ + 115.092, + 115.091, + 115.057 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:21:44Z", + "avg_ns": 3665181154, + "stddev_ns": 1366883, + "avg_ts": 34.923243, + "stddev_ts": 0.013027, + "samples_ns": [ + 3663616981, + 3666145975, + 3665780506 + ], + "samples_ts": [ + 34.9382, + 34.914, + 34.9175 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 296 + }, + { + "timestamp_utc": "2025-12-08T23:22:46.491338+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:21:56Z\",\n \"avg_ns\": 1111870249,\n \"stddev_ns\": 118454,\n \"avg_ts\": 115.121347,\n \"stddev_ts\": 0.011769,\n \"samples_ns\": [ 1111747810, 1111972410, 1111890528 ],\n \"samples_ts\": [ 115.134, 115.111, 115.119 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:22:00Z\",\n \"avg_ns\": 15234870471,\n \"stddev_ns\": 30233142,\n \"avg_ts\": 33.607200,\n \"stddev_ts\": 0.066615,\n \"samples_ns\": [ 15269779130, 15217188997, 15217643288 ],\n \"samples_ts\": [ 33.5303, 33.6462, 33.6452 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:21:56Z", + "avg_ns": 1111870249, + "stddev_ns": 118454, + "avg_ts": 115.121347, + "stddev_ts": 0.011769, + "samples_ns": [ + 1111747810, + 1111972410, + 1111890528 + ], + "samples_ts": [ + 115.134, + 115.111, + 115.119 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:22:00Z", + "avg_ns": 15234870471, + "stddev_ns": 30233142, + "avg_ts": 33.6072, + "stddev_ts": 0.066615, + "samples_ns": [ + 15269779130, + 15217188997, + 15217643288 + ], + "samples_ts": [ + 33.5303, + 33.6462, + 33.6452 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 297 + }, + { + "timestamp_utc": "2025-12-08T23:23:18.218018+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:22:47Z\",\n \"avg_ns\": 4942523695,\n \"stddev_ns\": 1306194,\n \"avg_ts\": 103.590808,\n \"stddev_ts\": 0.027334,\n \"samples_ns\": [ 4941499657, 4942079310, 4943992119 ],\n \"samples_ts\": [ 103.612, 103.6, 103.56 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:23:07Z\",\n \"avg_ns\": 3697233377,\n \"stddev_ns\": 416004,\n \"avg_ts\": 34.620482,\n \"stddev_ts\": 0.003853,\n \"samples_ns\": [ 3697707813, 3697019518, 3696972801 ],\n \"samples_ts\": [ 34.616, 34.6225, 34.6229 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:22:47Z", + "avg_ns": 4942523695, + "stddev_ns": 1306194, + "avg_ts": 103.590808, + "stddev_ts": 0.027334, + "samples_ns": [ + 4941499657, + 4942079310, + 4943992119 + ], + "samples_ts": [ + 103.612, + 103.6, + 103.56 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:23:07Z", + "avg_ns": 3697233377, + "stddev_ns": 416004, + "avg_ts": 34.620482, + "stddev_ts": 0.003853, + "samples_ns": [ + 3697707813, + 3697019518, + 3696972801 + ], + "samples_ts": [ + 34.616, + 34.6225, + 34.6229 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 298 + }, + { + "timestamp_utc": "2025-12-08T23:24:24.629492+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:23:18Z\",\n \"avg_ns\": 4925830120,\n \"stddev_ns\": 1416635,\n \"avg_ts\": 103.941877,\n \"stddev_ts\": 0.029854,\n \"samples_ns\": [ 4925587570, 4927350612, 4924552179 ],\n \"samples_ts\": [ 103.947, 103.91, 103.969 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:23:38Z\",\n \"avg_ns\": 15280506182,\n \"stddev_ns\": 4913095,\n \"avg_ts\": 33.506746,\n \"stddev_ts\": 0.010767,\n \"samples_ns\": [ 15285381540, 15275562289, 15280574719 ],\n \"samples_ts\": [ 33.4961, 33.5176, 33.5066 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:23:18Z", + "avg_ns": 4925830120, + "stddev_ns": 1416635, + "avg_ts": 103.941877, + "stddev_ts": 0.029854, + "samples_ns": [ + 4925587570, + 4927350612, + 4924552179 + ], + "samples_ts": [ + 103.947, + 103.91, + 103.969 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:23:38Z", + "avg_ns": 15280506182, + "stddev_ns": 4913095, + "avg_ts": 33.506746, + "stddev_ts": 0.010767, + "samples_ns": [ + 15285381540, + 15275562289, + 15280574719 + ], + "samples_ts": [ + 33.4961, + 33.5176, + 33.5066 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 299 + }, + { + "timestamp_utc": "2025-12-08T23:24:41.052070+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:24:25Z\",\n \"avg_ns\": 1111813886,\n \"stddev_ns\": 44018,\n \"avg_ts\": 115.127182,\n \"stddev_ts\": 0.002976,\n \"samples_ns\": [ 1111783091, 1111839985, 1111818583 ],\n \"samples_ts\": [ 115.13, 115.124, 115.127 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:24:29Z\",\n \"avg_ns\": 3704509332,\n \"stddev_ns\": 784317,\n \"avg_ts\": 34.552485,\n \"stddev_ts\": 0.007293,\n \"samples_ns\": [ 3704358094, 3705355857, 3703814046 ],\n \"samples_ts\": [ 34.5539, 34.5446, 34.559 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:24:25Z", + "avg_ns": 1111813886, + "stddev_ns": 44018, + "avg_ts": 115.127182, + "stddev_ts": 0.002976, + "samples_ns": [ + 1111783091, + 1111839985, + 1111818583 + ], + "samples_ts": [ + 115.13, + 115.124, + 115.127 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:24:29Z", + "avg_ns": 3704509332, + "stddev_ns": 784317, + "avg_ts": 34.552485, + "stddev_ts": 0.007293, + "samples_ns": [ + 3704358094, + 3705355857, + 3703814046 + ], + "samples_ts": [ + 34.5539, + 34.5446, + 34.559 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 300 + }, + { + "timestamp_utc": "2025-12-08T23:25:31.741072+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:24:41Z\",\n \"avg_ns\": 1112143409,\n \"stddev_ns\": 39970,\n \"avg_ts\": 115.093071,\n \"stddev_ts\": 0.004136,\n \"samples_ns\": [ 1112135645, 1112107890, 1112186692 ],\n \"samples_ts\": [ 115.094, 115.097, 115.089 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:24:46Z\",\n \"avg_ns\": 15124610929,\n \"stddev_ns\": 1018337,\n \"avg_ts\": 33.852111,\n \"stddev_ts\": 0.002246,\n \"samples_ns\": [ 15124025589, 15124037693, 15125769507 ],\n \"samples_ts\": [ 33.8534, 33.8534, 33.8495 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:24:41Z", + "avg_ns": 1112143409, + "stddev_ns": 39970, + "avg_ts": 115.093071, + "stddev_ts": 0.004136, + "samples_ns": [ + 1112135645, + 1112107890, + 1112186692 + ], + "samples_ts": [ + 115.094, + 115.097, + 115.089 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:24:46Z", + "avg_ns": 15124610929, + "stddev_ns": 1018337, + "avg_ts": 33.852111, + "stddev_ts": 0.002246, + "samples_ns": [ + 15124025589, + 15124037693, + 15125769507 + ], + "samples_ts": [ + 33.8534, + 33.8534, + 33.8495 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 301 + }, + { + "timestamp_utc": "2025-12-08T23:26:02.144620+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:25:32Z\",\n \"avg_ns\": 4634149526,\n \"stddev_ns\": 94654,\n \"avg_ts\": 110.484135,\n \"stddev_ts\": 0.001568,\n \"samples_ns\": [ 4634089244, 4634139669, 4634219666 ],\n \"samples_ts\": [ 110.486, 110.484, 110.482 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:25:51Z\",\n \"avg_ns\": 3656811954,\n \"stddev_ns\": 96609,\n \"avg_ts\": 35.003167,\n \"stddev_ts\": 0.000925,\n \"samples_ns\": [ 3656701225, 3656855584, 3656879053 ],\n \"samples_ts\": [ 35.0042, 35.0027, 35.0025 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:25:32Z", + "avg_ns": 4634149526, + "stddev_ns": 94654, + "avg_ts": 110.484135, + "stddev_ts": 0.001568, + "samples_ns": [ + 4634089244, + 4634139669, + 4634219666 + ], + "samples_ts": [ + 110.486, + 110.484, + 110.482 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:25:51Z", + "avg_ns": 3656811954, + "stddev_ns": 96609, + "avg_ts": 35.003167, + "stddev_ts": 0.000925, + "samples_ns": [ + 3656701225, + 3656855584, + 3656879053 + ], + "samples_ts": [ + 35.0042, + 35.0027, + 35.0025 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 302 + }, + { + "timestamp_utc": "2025-12-08T23:27:07.251859+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:26:02Z\",\n \"avg_ns\": 4637255941,\n \"stddev_ns\": 74999,\n \"avg_ts\": 110.410123,\n \"stddev_ts\": 0.001786,\n \"samples_ns\": [ 4637224892, 4637341479, 4637201452 ],\n \"samples_ts\": [ 110.411, 110.408, 110.411 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:26:21Z\",\n \"avg_ns\": 15230074317,\n \"stddev_ns\": 13732462,\n \"avg_ts\": 33.617714,\n \"stddev_ts\": 0.030296,\n \"samples_ns\": [ 15245909960, 15222867006, 15221445985 ],\n \"samples_ts\": [ 33.5828, 33.6336, 33.6368 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:26:02Z", + "avg_ns": 4637255941, + "stddev_ns": 74999, + "avg_ts": 110.410123, + "stddev_ts": 0.001786, + "samples_ns": [ + 4637224892, + 4637341479, + 4637201452 + ], + "samples_ts": [ + 110.411, + 110.408, + 110.411 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:26:21Z", + "avg_ns": 15230074317, + "stddev_ns": 13732462, + "avg_ts": 33.617714, + "stddev_ts": 0.030296, + "samples_ns": [ + 15245909960, + 15222867006, + 15221445985 + ], + "samples_ts": [ + 33.5828, + 33.6336, + 33.6368 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 303 + }, + { + "timestamp_utc": "2025-12-08T23:27:23.709940+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:27:08Z\",\n \"avg_ns\": 1112574223,\n \"stddev_ns\": 78652,\n \"avg_ts\": 115.048504,\n \"stddev_ts\": 0.007365,\n \"samples_ns\": [ 1112526602, 1112539958, 1112656110 ],\n \"samples_ts\": [ 115.053, 115.052, 115.04 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:27:12Z\",\n \"avg_ns\": 3706006755,\n \"stddev_ns\": 3079143,\n \"avg_ts\": 34.538539,\n \"stddev_ts\": 0.028693,\n \"samples_ns\": [ 3709226272, 3705703580, 3703090413 ],\n \"samples_ts\": [ 34.5085, 34.5413, 34.5657 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:27:08Z", + "avg_ns": 1112574223, + "stddev_ns": 78652, + "avg_ts": 115.048504, + "stddev_ts": 0.007365, + "samples_ns": [ + 1112526602, + 1112539958, + 1112656110 + ], + "samples_ts": [ + 115.053, + 115.052, + 115.04 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:27:12Z", + "avg_ns": 3706006755, + "stddev_ns": 3079143, + "avg_ts": 34.538539, + "stddev_ts": 0.028693, + "samples_ns": [ + 3709226272, + 3705703580, + 3703090413 + ], + "samples_ts": [ + 34.5085, + 34.5413, + 34.5657 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 304 + }, + { + "timestamp_utc": "2025-12-08T23:28:14.403696+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:27:24Z\",\n \"avg_ns\": 1111969189,\n \"stddev_ns\": 119251,\n \"avg_ts\": 115.111104,\n \"stddev_ts\": 0.012345,\n \"samples_ns\": [ 1112002056, 1112068560, 1111836951 ],\n \"samples_ts\": [ 115.108, 115.101, 115.125 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:27:28Z\",\n \"avg_ns\": 15112326543,\n \"stddev_ns\": 6862407,\n \"avg_ts\": 33.879633,\n \"stddev_ts\": 0.015389,\n \"samples_ns\": [ 15104404167, 15116147726, 15116427736 ],\n \"samples_ts\": [ 33.8974, 33.8711, 33.8704 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:27:24Z", + "avg_ns": 1111969189, + "stddev_ns": 119251, + "avg_ts": 115.111104, + "stddev_ts": 0.012345, + "samples_ns": [ + 1112002056, + 1112068560, + 1111836951 + ], + "samples_ts": [ + 115.108, + 115.101, + 115.125 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:27:28Z", + "avg_ns": 15112326543, + "stddev_ns": 6862407, + "avg_ts": 33.879633, + "stddev_ts": 0.015389, + "samples_ns": [ + 15104404167, + 15116147726, + 15116427736 + ], + "samples_ts": [ + 33.8974, + 33.8711, + 33.8704 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 305 + }, + { + "timestamp_utc": "2025-12-08T23:28:45.045158+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:28:15Z\",\n \"avg_ns\": 4689141948,\n \"stddev_ns\": 325040,\n \"avg_ts\": 109.188420,\n \"stddev_ts\": 0.007399,\n \"samples_ns\": [ 4689097531, 4688848749, 4689479565 ],\n \"samples_ts\": [ 109.189, 109.195, 109.181 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:28:33Z\",\n \"avg_ns\": 3672672987,\n \"stddev_ns\": 741183,\n \"avg_ts\": 34.852001,\n \"stddev_ts\": 0.006986,\n \"samples_ns\": [ 3673385647, 3672718025, 3671915291 ],\n \"samples_ts\": [ 34.8452, 34.8516, 34.8592 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:28:15Z", + "avg_ns": 4689141948, + "stddev_ns": 325040, + "avg_ts": 109.18842, + "stddev_ts": 0.007399, + "samples_ns": [ + 4689097531, + 4688848749, + 4689479565 + ], + "samples_ts": [ + 109.189, + 109.195, + 109.181 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:28:33Z", + "avg_ns": 3672672987, + "stddev_ns": 741183, + "avg_ts": 34.852001, + "stddev_ts": 0.006986, + "samples_ns": [ + 3673385647, + 3672718025, + 3671915291 + ], + "samples_ts": [ + 34.8452, + 34.8516, + 34.8592 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 306 + }, + { + "timestamp_utc": "2025-12-08T23:29:49.930220+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:28:45Z\",\n \"avg_ns\": 4658300511,\n \"stddev_ns\": 592065,\n \"avg_ts\": 109.911330,\n \"stddev_ts\": 0.013877,\n \"samples_ns\": [ 4657623537, 4658592501, 4658685496 ],\n \"samples_ts\": [ 109.927, 109.904, 109.902 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:29:04Z\",\n \"avg_ns\": 15127618181,\n \"stddev_ns\": 9158718,\n \"avg_ts\": 33.845389,\n \"stddev_ts\": 0.020484,\n \"samples_ns\": [ 15138115769, 15121259155, 15123479619 ],\n \"samples_ts\": [ 33.8219, 33.8596, 33.8546 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:28:45Z", + "avg_ns": 4658300511, + "stddev_ns": 592065, + "avg_ts": 109.91133, + "stddev_ts": 0.013877, + "samples_ns": [ + 4657623537, + 4658592501, + 4658685496 + ], + "samples_ts": [ + 109.927, + 109.904, + 109.902 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:29:04Z", + "avg_ns": 15127618181, + "stddev_ns": 9158718, + "avg_ts": 33.845389, + "stddev_ts": 0.020484, + "samples_ns": [ + 15138115769, + 15121259155, + 15123479619 + ], + "samples_ts": [ + 33.8219, + 33.8596, + 33.8546 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 307 + }, + { + "timestamp_utc": "2025-12-08T23:30:06.384562+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:29:50Z\",\n \"avg_ns\": 1112100370,\n \"stddev_ns\": 172271,\n \"avg_ts\": 115.097527,\n \"stddev_ts\": 0.017493,\n \"samples_ns\": [ 1112163554, 1111908868, 1112228689 ],\n \"samples_ts\": [ 115.091, 115.117, 115.084 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:29:55Z\",\n \"avg_ns\": 3715132451,\n \"stddev_ns\": 512835,\n \"avg_ts\": 34.453685,\n \"stddev_ts\": 0.004722,\n \"samples_ns\": [ 3714590567, 3715205767, 3715601020 ],\n \"samples_ts\": [ 34.4587, 34.453, 34.4493 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:29:50Z", + "avg_ns": 1112100370, + "stddev_ns": 172271, + "avg_ts": 115.097527, + "stddev_ts": 0.017493, + "samples_ns": [ + 1112163554, + 1111908868, + 1112228689 + ], + "samples_ts": [ + 115.091, + 115.117, + 115.084 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:29:55Z", + "avg_ns": 3715132451, + "stddev_ns": 512835, + "avg_ts": 34.453685, + "stddev_ts": 0.004722, + "samples_ns": [ + 3714590567, + 3715205767, + 3715601020 + ], + "samples_ts": [ + 34.4587, + 34.453, + 34.4493 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 308 + }, + { + "timestamp_utc": "2025-12-08T23:30:57.240394+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:30:07Z\",\n \"avg_ns\": 1112370618,\n \"stddev_ns\": 71388,\n \"avg_ts\": 115.069562,\n \"stddev_ts\": 0.005543,\n \"samples_ns\": [ 1112311758, 1112416576, 1112383522 ],\n \"samples_ts\": [ 115.076, 115.065, 115.068 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:30:11Z\",\n \"avg_ns\": 15180096222,\n \"stddev_ns\": 22661582,\n \"avg_ts\": 33.728427,\n \"stddev_ts\": 0.050394,\n \"samples_ns\": [ 15153949926, 15192269075, 15194069666 ],\n \"samples_ts\": [ 33.7866, 33.7014, 33.6974 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:30:07Z", + "avg_ns": 1112370618, + "stddev_ns": 71388, + "avg_ts": 115.069562, + "stddev_ts": 0.005543, + "samples_ns": [ + 1112311758, + 1112416576, + 1112383522 + ], + "samples_ts": [ + 115.076, + 115.065, + 115.068 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:30:11Z", + "avg_ns": 15180096222, + "stddev_ns": 22661582, + "avg_ts": 33.728427, + "stddev_ts": 0.050394, + "samples_ns": [ + 15153949926, + 15192269075, + 15194069666 + ], + "samples_ts": [ + 33.7866, + 33.7014, + 33.6974 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 309 + }, + { + "timestamp_utc": "2025-12-08T23:31:28.829531+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:30:58Z\",\n \"avg_ns\": 4929165791,\n \"stddev_ns\": 320718,\n \"avg_ts\": 103.871532,\n \"stddev_ts\": 0.006594,\n \"samples_ns\": [ 4929512794, 4928904981, 4929079599 ],\n \"samples_ts\": [ 103.864, 103.877, 103.873 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:31:17Z\",\n \"avg_ns\": 3660591877,\n \"stddev_ns\": 987515,\n \"avg_ts\": 34.967025,\n \"stddev_ts\": 0.009414,\n \"samples_ns\": [ 3661672758, 3660360141, 3659742733 ],\n \"samples_ts\": [ 34.9567, 34.9692, 34.9751 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:30:58Z", + "avg_ns": 4929165791, + "stddev_ns": 320718, + "avg_ts": 103.871532, + "stddev_ts": 0.006594, + "samples_ns": [ + 4929512794, + 4928904981, + 4929079599 + ], + "samples_ts": [ + 103.864, + 103.877, + 103.873 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:31:17Z", + "avg_ns": 3660591877, + "stddev_ns": 987515, + "avg_ts": 34.967025, + "stddev_ts": 0.009414, + "samples_ns": [ + 3661672758, + 3660360141, + 3659742733 + ], + "samples_ts": [ + 34.9567, + 34.9692, + 34.9751 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 310 + }, + { + "timestamp_utc": "2025-12-08T23:32:35.101215+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:31:29Z\",\n \"avg_ns\": 4946311521,\n \"stddev_ns\": 623956,\n \"avg_ts\": 103.511476,\n \"stddev_ts\": 0.013057,\n \"samples_ns\": [ 4946177566, 4946991576, 4945765421 ],\n \"samples_ts\": [ 103.514, 103.497, 103.523 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:31:49Z\",\n \"avg_ns\": 15203698717,\n \"stddev_ns\": 51627890,\n \"avg_ts\": 33.676274,\n \"stddev_ts\": 0.114132,\n \"samples_ns\": [ 15263306328, 15173105321, 15174684503 ],\n \"samples_ts\": [ 33.5445, 33.7439, 33.7404 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:31:29Z", + "avg_ns": 4946311521, + "stddev_ns": 623956, + "avg_ts": 103.511476, + "stddev_ts": 0.013057, + "samples_ns": [ + 4946177566, + 4946991576, + 4945765421 + ], + "samples_ts": [ + 103.514, + 103.497, + 103.523 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:31:49Z", + "avg_ns": 15203698717, + "stddev_ns": 51627890, + "avg_ts": 33.676274, + "stddev_ts": 0.114132, + "samples_ns": [ + 15263306328, + 15173105321, + 15174684503 + ], + "samples_ts": [ + 33.5445, + 33.7439, + 33.7404 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 311 + }, + { + "timestamp_utc": "2025-12-08T23:32:51.368566+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:32:35Z\",\n \"avg_ns\": 1112107771,\n \"stddev_ns\": 37408,\n \"avg_ts\": 115.096759,\n \"stddev_ts\": 0.003872,\n \"samples_ns\": [ 1112065911, 1112119469, 1112137933 ],\n \"samples_ts\": [ 115.101, 115.096, 115.094 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:32:40Z\",\n \"avg_ns\": 3641813770,\n \"stddev_ns\": 393708,\n \"avg_ts\": 35.147322,\n \"stddev_ts\": 0.003755,\n \"samples_ns\": [ 3642246100, 3641703356, 3641491855 ],\n \"samples_ts\": [ 35.1431, 35.1484, 35.1504 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:32:35Z", + "avg_ns": 1112107771, + "stddev_ns": 37408, + "avg_ts": 115.096759, + "stddev_ts": 0.003872, + "samples_ns": [ + 1112065911, + 1112119469, + 1112137933 + ], + "samples_ts": [ + 115.101, + 115.096, + 115.094 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:32:40Z", + "avg_ns": 3641813770, + "stddev_ns": 393708, + "avg_ts": 35.147322, + "stddev_ts": 0.003755, + "samples_ns": [ + 3642246100, + 3641703356, + 3641491855 + ], + "samples_ts": [ + 35.1431, + 35.1484, + 35.1504 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 312 + }, + { + "timestamp_utc": "2025-12-08T23:33:42.362476+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:32:52Z\",\n \"avg_ns\": 1112386484,\n \"stddev_ns\": 64178,\n \"avg_ts\": 115.067921,\n \"stddev_ts\": 0.005672,\n \"samples_ns\": [ 1112445107, 1112336459, 1112377887 ],\n \"samples_ts\": [ 115.062, 115.073, 115.069 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:32:56Z\",\n \"avg_ns\": 15224750527,\n \"stddev_ns\": 47592730,\n \"avg_ts\": 33.629670,\n \"stddev_ts\": 0.104938,\n \"samples_ns\": [ 15279668557, 15195542139, 15199040886 ],\n \"samples_ts\": [ 33.5086, 33.6941, 33.6863 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:32:52Z", + "avg_ns": 1112386484, + "stddev_ns": 64178, + "avg_ts": 115.067921, + "stddev_ts": 0.005672, + "samples_ns": [ + 1112445107, + 1112336459, + 1112377887 + ], + "samples_ts": [ + 115.062, + 115.073, + 115.069 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:32:56Z", + "avg_ns": 15224750527, + "stddev_ns": 47592730, + "avg_ts": 33.62967, + "stddev_ts": 0.104938, + "samples_ns": [ + 15279668557, + 15195542139, + 15199040886 + ], + "samples_ts": [ + 33.5086, + 33.6941, + 33.6863 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 313 + }, + { + "timestamp_utc": "2025-12-08T23:34:12.854259+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:33:43Z\",\n \"avg_ns\": 4636943869,\n \"stddev_ns\": 89453,\n \"avg_ts\": 110.417554,\n \"stddev_ts\": 0.002130,\n \"samples_ns\": [ 4636958724, 4636847918, 4637024965 ],\n \"samples_ts\": [ 110.417, 110.42, 110.416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:34:01Z\",\n \"avg_ns\": 3681843010,\n \"stddev_ns\": 250044,\n \"avg_ts\": 34.765198,\n \"stddev_ts\": 0.002290,\n \"samples_ns\": [ 3681896735, 3682054215, 3681578081 ],\n \"samples_ts\": [ 34.7647, 34.7632, 34.7677 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:33:43Z", + "avg_ns": 4636943869, + "stddev_ns": 89453, + "avg_ts": 110.417554, + "stddev_ts": 0.00213, + "samples_ns": [ + 4636958724, + 4636847918, + 4637024965 + ], + "samples_ts": [ + 110.417, + 110.42, + 110.416 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:34:01Z", + "avg_ns": 3681843010, + "stddev_ns": 250044, + "avg_ts": 34.765198, + "stddev_ts": 0.00229, + "samples_ns": [ + 3681896735, + 3682054215, + 3681578081 + ], + "samples_ts": [ + 34.7647, + 34.7632, + 34.7677 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 314 + }, + { + "timestamp_utc": "2025-12-08T23:35:17.997683+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:34:13Z\",\n \"avg_ns\": 4651791912,\n \"stddev_ns\": 145416,\n \"avg_ts\": 110.065112,\n \"stddev_ts\": 0.002575,\n \"samples_ns\": [ 4651680947, 4651898457, 4651796334 ],\n \"samples_ts\": [ 110.068, 110.063, 110.065 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:34:32Z\",\n \"avg_ns\": 15223593996,\n \"stddev_ns\": 39495899,\n \"avg_ts\": 33.632157,\n \"stddev_ts\": 0.087384,\n \"samples_ns\": [ 15178034981, 15244586628, 15248160380 ],\n \"samples_ts\": [ 33.733, 33.5857, 33.5778 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:34:13Z", + "avg_ns": 4651791912, + "stddev_ns": 145416, + "avg_ts": 110.065112, + "stddev_ts": 0.002575, + "samples_ns": [ + 4651680947, + 4651898457, + 4651796334 + ], + "samples_ts": [ + 110.068, + 110.063, + 110.065 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:34:32Z", + "avg_ns": 15223593996, + "stddev_ns": 39495899, + "avg_ts": 33.632157, + "stddev_ts": 0.087384, + "samples_ns": [ + 15178034981, + 15244586628, + 15248160380 + ], + "samples_ts": [ + 33.733, + 33.5857, + 33.5778 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 315 + }, + { + "timestamp_utc": "2025-12-08T23:35:34.311766+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:35:18Z\",\n \"avg_ns\": 1112445077,\n \"stddev_ns\": 102762,\n \"avg_ts\": 115.061861,\n \"stddev_ts\": 0.010629,\n \"samples_ns\": [ 1112326587, 1112498831, 1112509813 ],\n \"samples_ts\": [ 115.074, 115.056, 115.055 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:35:23Z\",\n \"avg_ns\": 3650341210,\n \"stddev_ns\": 702512,\n \"avg_ts\": 35.065216,\n \"stddev_ts\": 0.006699,\n \"samples_ns\": [ 3650916510, 3650541410, 3649565712 ],\n \"samples_ts\": [ 35.0597, 35.0633, 35.0727 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:35:18Z", + "avg_ns": 1112445077, + "stddev_ns": 102762, + "avg_ts": 115.061861, + "stddev_ts": 0.010629, + "samples_ns": [ + 1112326587, + 1112498831, + 1112509813 + ], + "samples_ts": [ + 115.074, + 115.056, + 115.055 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:35:23Z", + "avg_ns": 3650341210, + "stddev_ns": 702512, + "avg_ts": 35.065216, + "stddev_ts": 0.006699, + "samples_ns": [ + 3650916510, + 3650541410, + 3649565712 + ], + "samples_ts": [ + 35.0597, + 35.0633, + 35.0727 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 316 + }, + { + "timestamp_utc": "2025-12-08T23:36:24.977345+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:35:35Z\",\n \"avg_ns\": 1111617126,\n \"stddev_ns\": 52373,\n \"avg_ts\": 115.147560,\n \"stddev_ts\": 0.005425,\n \"samples_ns\": [ 1111677600, 1111587231, 1111586547 ],\n \"samples_ts\": [ 115.141, 115.151, 115.151 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:35:39Z\",\n \"avg_ns\": 15117515271,\n \"stddev_ns\": 810239,\n \"avg_ts\": 33.868000,\n \"stddev_ts\": 0.001794,\n \"samples_ns\": [ 15118217321, 15117685510, 15116642983 ],\n \"samples_ts\": [ 33.8664, 33.8676, 33.87 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:35:35Z", + "avg_ns": 1111617126, + "stddev_ns": 52373, + "avg_ts": 115.14756, + "stddev_ts": 0.005425, + "samples_ns": [ + 1111677600, + 1111587231, + 1111586547 + ], + "samples_ts": [ + 115.141, + 115.151, + 115.151 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:35:39Z", + "avg_ns": 15117515271, + "stddev_ns": 810239, + "avg_ts": 33.868, + "stddev_ts": 0.001794, + "samples_ns": [ + 15118217321, + 15117685510, + 15116642983 + ], + "samples_ts": [ + 33.8664, + 33.8676, + 33.87 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 317 + }, + { + "timestamp_utc": "2025-12-08T23:36:55.659610+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:36:25Z\",\n \"avg_ns\": 4684162241,\n \"stddev_ns\": 297514,\n \"avg_ts\": 109.304498,\n \"stddev_ts\": 0.006942,\n \"samples_ns\": [ 4684070634, 4684494787, 4683921302 ],\n \"samples_ts\": [ 109.307, 109.297, 109.31 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:36:44Z\",\n \"avg_ns\": 3684840296,\n \"stddev_ns\": 479127,\n \"avg_ts\": 34.736920,\n \"stddev_ts\": 0.004481,\n \"samples_ns\": [ 3685177534, 3685046619, 3684296736 ],\n \"samples_ts\": [ 34.7337, 34.735, 34.742 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:36:25Z", + "avg_ns": 4684162241, + "stddev_ns": 297514, + "avg_ts": 109.304498, + "stddev_ts": 0.006942, + "samples_ns": [ + 4684070634, + 4684494787, + 4683921302 + ], + "samples_ts": [ + 109.307, + 109.297, + 109.31 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:36:44Z", + "avg_ns": 3684840296, + "stddev_ns": 479127, + "avg_ts": 34.73692, + "stddev_ts": 0.004481, + "samples_ns": [ + 3685177534, + 3685046619, + 3684296736 + ], + "samples_ts": [ + 34.7337, + 34.735, + 34.742 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 318 + }, + { + "timestamp_utc": "2025-12-08T23:38:00.720673+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:36:56Z\",\n \"avg_ns\": 4650866306,\n \"stddev_ns\": 497631,\n \"avg_ts\": 110.087018,\n \"stddev_ts\": 0.011779,\n \"samples_ns\": [ 4651395656, 4650795225, 4650408037 ],\n \"samples_ts\": [ 110.074, 110.089, 110.098 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:37:15Z\",\n \"avg_ns\": 15191805760,\n \"stddev_ns\": 17227477,\n \"avg_ts\": 33.702408,\n \"stddev_ts\": 0.038244,\n \"samples_ns\": [ 15171913285, 15201807395, 15201696600 ],\n \"samples_ts\": [ 33.7466, 33.6802, 33.6805 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:36:56Z", + "avg_ns": 4650866306, + "stddev_ns": 497631, + "avg_ts": 110.087018, + "stddev_ts": 0.011779, + "samples_ns": [ + 4651395656, + 4650795225, + 4650408037 + ], + "samples_ts": [ + 110.074, + 110.089, + 110.098 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:37:15Z", + "avg_ns": 15191805760, + "stddev_ns": 17227477, + "avg_ts": 33.702408, + "stddev_ts": 0.038244, + "samples_ns": [ + 15171913285, + 15201807395, + 15201696600 + ], + "samples_ts": [ + 33.7466, + 33.6802, + 33.6805 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 319 + }, + { + "timestamp_utc": "2025-12-08T23:38:17.024618+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:38:01Z\",\n \"avg_ns\": 1111818892,\n \"stddev_ns\": 65727,\n \"avg_ts\": 115.126664,\n \"stddev_ts\": 0.005865,\n \"samples_ns\": [ 1111834565, 1111866047, 1111756065 ],\n \"samples_ts\": [ 115.125, 115.122, 115.133 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:38:05Z\",\n \"avg_ns\": 3639488176,\n \"stddev_ns\": 1548240,\n \"avg_ts\": 35.169785,\n \"stddev_ts\": 0.014937,\n \"samples_ns\": [ 3641142008, 3639242983, 3638079539 ],\n \"samples_ts\": [ 35.1538, 35.1721, 35.1834 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:38:01Z", + "avg_ns": 1111818892, + "stddev_ns": 65727, + "avg_ts": 115.126664, + "stddev_ts": 0.005865, + "samples_ns": [ + 1111834565, + 1111866047, + 1111756065 + ], + "samples_ts": [ + 115.125, + 115.122, + 115.133 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:38:05Z", + "avg_ns": 3639488176, + "stddev_ns": 1548240, + "avg_ts": 35.169785, + "stddev_ts": 0.014937, + "samples_ns": [ + 3641142008, + 3639242983, + 3638079539 + ], + "samples_ts": [ + 35.1538, + 35.1721, + 35.1834 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 320 + }, + { + "timestamp_utc": "2025-12-08T23:39:08.070359+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:38:17Z\",\n \"avg_ns\": 1111722072,\n \"stddev_ns\": 84390,\n \"avg_ts\": 115.136691,\n \"stddev_ts\": 0.007248,\n \"samples_ns\": [ 1111650726, 1111724875, 1111790617 ],\n \"samples_ts\": [ 115.144, 115.136, 115.13 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:38:22Z\",\n \"avg_ns\": 15244130695,\n \"stddev_ns\": 19558784,\n \"avg_ts\": 33.586734,\n \"stddev_ts\": 0.043059,\n \"samples_ns\": [ 15266712471, 15232588520, 15233091096 ],\n \"samples_ts\": [ 33.537, 33.6121, 33.611 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:38:17Z", + "avg_ns": 1111722072, + "stddev_ns": 84390, + "avg_ts": 115.136691, + "stddev_ts": 0.007248, + "samples_ns": [ + 1111650726, + 1111724875, + 1111790617 + ], + "samples_ts": [ + 115.144, + 115.136, + 115.13 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:38:22Z", + "avg_ns": 15244130695, + "stddev_ns": 19558784, + "avg_ts": 33.586734, + "stddev_ts": 0.043059, + "samples_ns": [ + 15266712471, + 15232588520, + 15233091096 + ], + "samples_ts": [ + 33.537, + 33.6121, + 33.611 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 321 + }, + { + "timestamp_utc": "2025-12-08T23:39:39.761351+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:39:08Z\",\n \"avg_ns\": 4931410306,\n \"stddev_ns\": 440413,\n \"avg_ts\": 103.824255,\n \"stddev_ts\": 0.009154,\n \"samples_ns\": [ 4930908267, 4931660790, 4931661862 ],\n \"samples_ts\": [ 103.835, 103.819, 103.819 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:39:28Z\",\n \"avg_ns\": 3689737040,\n \"stddev_ns\": 1379575,\n \"avg_ts\": 34.690822,\n \"stddev_ts\": 0.012959,\n \"samples_ns\": [ 3690974184, 3689985444, 3688251493 ],\n \"samples_ts\": [ 34.6792, 34.6885, 34.7048 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:39:08Z", + "avg_ns": 4931410306, + "stddev_ns": 440413, + "avg_ts": 103.824255, + "stddev_ts": 0.009154, + "samples_ns": [ + 4930908267, + 4931660790, + 4931661862 + ], + "samples_ts": [ + 103.835, + 103.819, + 103.819 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:39:28Z", + "avg_ns": 3689737040, + "stddev_ns": 1379575, + "avg_ts": 34.690822, + "stddev_ts": 0.012959, + "samples_ns": [ + 3690974184, + 3689985444, + 3688251493 + ], + "samples_ts": [ + 34.6792, + 34.6885, + 34.7048 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 322 + }, + { + "timestamp_utc": "2025-12-08T23:40:45.797929+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:39:40Z\",\n \"avg_ns\": 4955884296,\n \"stddev_ns\": 534416,\n \"avg_ts\": 103.311533,\n \"stddev_ts\": 0.011140,\n \"samples_ns\": [ 4955788934, 4956459974, 4955403980 ],\n \"samples_ts\": [ 103.314, 103.3, 103.322 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:40:00Z\",\n \"avg_ns\": 15110937496,\n \"stddev_ns\": 14464535,\n \"avg_ts\": 33.882763,\n \"stddev_ts\": 0.032414,\n \"samples_ns\": [ 15127637724, 15102401684, 15102773081 ],\n \"samples_ts\": [ 33.8453, 33.9019, 33.9011 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:39:40Z", + "avg_ns": 4955884296, + "stddev_ns": 534416, + "avg_ts": 103.311533, + "stddev_ts": 0.01114, + "samples_ns": [ + 4955788934, + 4956459974, + 4955403980 + ], + "samples_ts": [ + 103.314, + 103.3, + 103.322 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:40:00Z", + "avg_ns": 15110937496, + "stddev_ns": 14464535, + "avg_ts": 33.882763, + "stddev_ts": 0.032414, + "samples_ns": [ + 15127637724, + 15102401684, + 15102773081 + ], + "samples_ts": [ + 33.8453, + 33.9019, + 33.9011 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 323 + }, + { + "timestamp_utc": "2025-12-08T23:40:57.991481+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:40:46Z\",\n \"avg_ns\": 586574549,\n \"stddev_ns\": 363258,\n \"avg_ts\": 218.216137,\n \"stddev_ts\": 0.134524,\n \"samples_ns\": [ 586535994, 586953923, 586233732 ],\n \"samples_ts\": [ 218.23, 218.075, 218.343 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:40:48Z\",\n \"avg_ns\": 2986188443,\n \"stddev_ns\": 349590,\n \"avg_ts\": 42.864007,\n \"stddev_ts\": 0.004956,\n \"samples_ns\": [ 2985843299, 2986533885, 2986188146 ],\n \"samples_ts\": [ 42.869, 42.859, 42.864 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:40:46Z", + "avg_ns": 586574549, + "stddev_ns": 363258, + "avg_ts": 218.216137, + "stddev_ts": 0.134524, + "samples_ns": [ + 586535994, + 586953923, + 586233732 + ], + "samples_ts": [ + 218.23, + 218.075, + 218.343 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:40:48Z", + "avg_ns": 2986188443, + "stddev_ns": 349590, + "avg_ts": 42.864007, + "stddev_ts": 0.004956, + "samples_ns": [ + 2985843299, + 2986533885, + 2986188146 + ], + "samples_ts": [ + 42.869, + 42.859, + 42.864 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 324 + }, + { + "timestamp_utc": "2025-12-08T23:41:38.005093+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:40:58Z\",\n \"avg_ns\": 586261718,\n \"stddev_ns\": 206341,\n \"avg_ts\": 218.332541,\n \"stddev_ts\": 0.076856,\n \"samples_ns\": [ 586031753, 586430682, 586322719 ],\n \"samples_ts\": [ 218.418, 218.27, 218.31 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:41:01Z\",\n \"avg_ns\": 12268017453,\n \"stddev_ns\": 10676933,\n \"avg_ts\": 41.734556,\n \"stddev_ts\": 0.036301,\n \"samples_ns\": [ 12280217823, 12260388994, 12263445544 ],\n \"samples_ts\": [ 41.6931, 41.7605, 41.7501 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:40:58Z", + "avg_ns": 586261718, + "stddev_ns": 206341, + "avg_ts": 218.332541, + "stddev_ts": 0.076856, + "samples_ns": [ + 586031753, + 586430682, + 586322719 + ], + "samples_ts": [ + 218.418, + 218.27, + 218.31 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:41:01Z", + "avg_ns": 12268017453, + "stddev_ns": 10676933, + "avg_ts": 41.734556, + "stddev_ts": 0.036301, + "samples_ns": [ + 12280217823, + 12260388994, + 12263445544 + ], + "samples_ts": [ + 41.6931, + 41.7605, + 41.7501 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 325 + }, + { + "timestamp_utc": "2025-12-08T23:41:57.673367+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:41:38Z\",\n \"avg_ns\": 2439993736,\n \"stddev_ns\": 2030331,\n \"avg_ts\": 209.836701,\n \"stddev_ts\": 0.174572,\n \"samples_ns\": [ 2442167943, 2439666147, 2438147118 ],\n \"samples_ts\": [ 209.65, 209.865, 209.996 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:41:48Z\",\n \"avg_ns\": 3002439838,\n \"stddev_ns\": 819677,\n \"avg_ts\": 42.631997,\n \"stddev_ts\": 0.011637,\n \"samples_ns\": [ 3003371064, 3001827647, 3002120803 ],\n \"samples_ts\": [ 42.6188, 42.6407, 42.6365 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:41:38Z", + "avg_ns": 2439993736, + "stddev_ns": 2030331, + "avg_ts": 209.836701, + "stddev_ts": 0.174572, + "samples_ns": [ + 2442167943, + 2439666147, + 2438147118 + ], + "samples_ts": [ + 209.65, + 209.865, + 209.996 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:41:48Z", + "avg_ns": 3002439838, + "stddev_ns": 819677, + "avg_ts": 42.631997, + "stddev_ts": 0.011637, + "samples_ns": [ + 3003371064, + 3001827647, + 3002120803 + ], + "samples_ts": [ + 42.6188, + 42.6407, + 42.6365 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 326 + }, + { + "timestamp_utc": "2025-12-08T23:42:44.961791+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:41:58Z\",\n \"avg_ns\": 2439221130,\n \"stddev_ns\": 311194,\n \"avg_ts\": 209.903071,\n \"stddev_ts\": 0.026779,\n \"samples_ns\": [ 2439182719, 2438930924, 2439549747 ],\n \"samples_ts\": [ 209.906, 209.928, 209.875 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:42:08Z\",\n \"avg_ns\": 12222942810,\n \"stddev_ns\": 2896972,\n \"avg_ts\": 41.888441,\n \"stddev_ts\": 0.009922,\n \"samples_ns\": [ 12219600761, 12224670875, 12224556795 ],\n \"samples_ts\": [ 41.8999, 41.8825, 41.8829 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:41:58Z", + "avg_ns": 2439221130, + "stddev_ns": 311194, + "avg_ts": 209.903071, + "stddev_ts": 0.026779, + "samples_ns": [ + 2439182719, + 2438930924, + 2439549747 + ], + "samples_ts": [ + 209.906, + 209.928, + 209.875 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:42:08Z", + "avg_ns": 12222942810, + "stddev_ns": 2896972, + "avg_ts": 41.888441, + "stddev_ts": 0.009922, + "samples_ns": [ + 12219600761, + 12224670875, + 12224556795 + ], + "samples_ts": [ + 41.8999, + 41.8825, + 41.8829 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 327 + }, + { + "timestamp_utc": "2025-12-08T23:42:57.182706+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:42:45Z\",\n \"avg_ns\": 586243447,\n \"stddev_ns\": 127294,\n \"avg_ts\": 218.339334,\n \"stddev_ts\": 0.047410,\n \"samples_ns\": [ 586114267, 586368769, 586247305 ],\n \"samples_ts\": [ 218.387, 218.293, 218.338 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:42:48Z\",\n \"avg_ns\": 2994040998,\n \"stddev_ns\": 1936859,\n \"avg_ts\": 42.751598,\n \"stddev_ts\": 0.027667,\n \"samples_ns\": [ 2995181115, 2995137230, 2991804649 ],\n \"samples_ts\": [ 42.7353, 42.7359, 42.7835 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:42:45Z", + "avg_ns": 586243447, + "stddev_ns": 127294, + "avg_ts": 218.339334, + "stddev_ts": 0.04741, + "samples_ns": [ + 586114267, + 586368769, + 586247305 + ], + "samples_ts": [ + 218.387, + 218.293, + 218.338 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:42:48Z", + "avg_ns": 2994040998, + "stddev_ns": 1936859, + "avg_ts": 42.751598, + "stddev_ts": 0.027667, + "samples_ns": [ + 2995181115, + 2995137230, + 2991804649 + ], + "samples_ts": [ + 42.7353, + 42.7359, + 42.7835 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 328 + }, + { + "timestamp_utc": "2025-12-08T23:43:37.071967+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:42:57Z\",\n \"avg_ns\": 585331078,\n \"stddev_ns\": 164631,\n \"avg_ts\": 218.679670,\n \"stddev_ts\": 0.061500,\n \"samples_ns\": [ 585285262, 585194207, 585513765 ],\n \"samples_ts\": [ 218.697, 218.731, 218.611 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:43:00Z\",\n \"avg_ns\": 12225516221,\n \"stddev_ns\": 18763613,\n \"avg_ts\": 41.879688,\n \"stddev_ts\": 0.064327,\n \"samples_ns\": [ 12233496097, 12238970542, 12204082025 ],\n \"samples_ts\": [ 41.8523, 41.8336, 41.9532 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:42:57Z", + "avg_ns": 585331078, + "stddev_ns": 164631, + "avg_ts": 218.67967, + "stddev_ts": 0.0615, + "samples_ns": [ + 585285262, + 585194207, + 585513765 + ], + "samples_ts": [ + 218.697, + 218.731, + 218.611 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:43:00Z", + "avg_ns": 12225516221, + "stddev_ns": 18763613, + "avg_ts": 41.879688, + "stddev_ts": 0.064327, + "samples_ns": [ + 12233496097, + 12238970542, + 12204082025 + ], + "samples_ts": [ + 41.8523, + 41.8336, + 41.9532 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 329 + }, + { + "timestamp_utc": "2025-12-08T23:43:56.913986+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:43:37Z\",\n \"avg_ns\": 2488648139,\n \"stddev_ns\": 16804119,\n \"avg_ts\": 205.740418,\n \"stddev_ts\": 1.384297,\n \"samples_ns\": [ 2507863512, 2481376157, 2476704748 ],\n \"samples_ts\": [ 204.158, 206.337, 206.726 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:43:47Z\",\n \"avg_ns\": 2989486601,\n \"stddev_ns\": 7572963,\n \"avg_ts\": 42.816900,\n \"stddev_ts\": 0.108565,\n \"samples_ns\": [ 2981097291, 2991545514, 2995816999 ],\n \"samples_ts\": [ 42.9372, 42.7872, 42.7262 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:43:37Z", + "avg_ns": 2488648139, + "stddev_ns": 16804119, + "avg_ts": 205.740418, + "stddev_ts": 1.384297, + "samples_ns": [ + 2507863512, + 2481376157, + 2476704748 + ], + "samples_ts": [ + 204.158, + 206.337, + 206.726 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:43:47Z", + "avg_ns": 2989486601, + "stddev_ns": 7572963, + "avg_ts": 42.8169, + "stddev_ts": 0.108565, + "samples_ns": [ + 2981097291, + 2991545514, + 2995816999 + ], + "samples_ts": [ + 42.9372, + 42.7872, + 42.7262 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 330 + }, + { + "timestamp_utc": "2025-12-08T23:44:43.892533+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:43:57Z\",\n \"avg_ns\": 2425645219,\n \"stddev_ns\": 205179,\n \"avg_ts\": 211.077860,\n \"stddev_ts\": 0.017332,\n \"samples_ns\": [ 2425584692, 2425483323, 2425867643 ],\n \"samples_ts\": [ 211.083, 211.092, 211.059 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:44:07Z\",\n \"avg_ns\": 12129608203,\n \"stddev_ns\": 1536761,\n \"avg_ts\": 42.210762,\n \"stddev_ts\": 0.005321,\n \"samples_ns\": [ 12130744190, 12127869942, 12130210479 ],\n \"samples_ts\": [ 42.2068, 42.2168, 42.2087 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:43:57Z", + "avg_ns": 2425645219, + "stddev_ns": 205179, + "avg_ts": 211.07786, + "stddev_ts": 0.017332, + "samples_ns": [ + 2425584692, + 2425483323, + 2425867643 + ], + "samples_ts": [ + 211.083, + 211.092, + 211.059 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:44:07Z", + "avg_ns": 12129608203, + "stddev_ns": 1536761, + "avg_ts": 42.210762, + "stddev_ts": 0.005321, + "samples_ns": [ + 12130744190, + 12127869942, + 12130210479 + ], + "samples_ts": [ + 42.2068, + 42.2168, + 42.2087 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 331 + }, + { + "timestamp_utc": "2025-12-08T23:44:56.042237+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:44:44Z\",\n \"avg_ns\": 580105206,\n \"stddev_ns\": 127733,\n \"avg_ts\": 220.649639,\n \"stddev_ts\": 0.048591,\n \"samples_ns\": [ 580167873, 580189503, 579958242 ],\n \"samples_ts\": [ 220.626, 220.618, 220.706 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:44:47Z\",\n \"avg_ns\": 2978363818,\n \"stddev_ns\": 199792,\n \"avg_ts\": 42.976617,\n \"stddev_ts\": 0.002659,\n \"samples_ns\": [ 2978515539, 2978417171, 2978158746 ],\n \"samples_ts\": [ 42.9744, 42.9758, 42.9796 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:44:44Z", + "avg_ns": 580105206, + "stddev_ns": 127733, + "avg_ts": 220.649639, + "stddev_ts": 0.048591, + "samples_ns": [ + 580167873, + 580189503, + 579958242 + ], + "samples_ts": [ + 220.626, + 220.618, + 220.706 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:44:47Z", + "avg_ns": 2978363818, + "stddev_ns": 199792, + "avg_ts": 42.976617, + "stddev_ts": 0.002659, + "samples_ns": [ + 2978515539, + 2978417171, + 2978158746 + ], + "samples_ts": [ + 42.9744, + 42.9758, + 42.9796 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 332 + }, + { + "timestamp_utc": "2025-12-08T23:45:36.330774+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:44:56Z\",\n \"avg_ns\": 587596599,\n \"stddev_ns\": 1430537,\n \"avg_ts\": 217.837382,\n \"stddev_ts\": 0.529900,\n \"samples_ns\": [ 586287076, 587380085, 589122638 ],\n \"samples_ts\": [ 218.323, 217.917, 217.272 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:44:59Z\",\n \"avg_ns\": 12342743218,\n \"stddev_ns\": 2588591,\n \"avg_ts\": 41.481866,\n \"stddev_ts\": 0.008692,\n \"samples_ns\": [ 12343342258, 12339910059, 12344977338 ],\n \"samples_ts\": [ 41.4799, 41.4914, 41.4744 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:44:56Z", + "avg_ns": 587596599, + "stddev_ns": 1430537, + "avg_ts": 217.837382, + "stddev_ts": 0.5299, + "samples_ns": [ + 586287076, + 587380085, + 589122638 + ], + "samples_ts": [ + 218.323, + 217.917, + 217.272 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:44:59Z", + "avg_ns": 12342743218, + "stddev_ns": 2588591, + "avg_ts": 41.481866, + "stddev_ts": 0.008692, + "samples_ns": [ + 12343342258, + 12339910059, + 12344977338 + ], + "samples_ts": [ + 41.4799, + 41.4914, + 41.4744 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 333 + }, + { + "timestamp_utc": "2025-12-08T23:45:56.469966+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:45:37Z\",\n \"avg_ns\": 2584763660,\n \"stddev_ns\": 854087,\n \"avg_ts\": 198.083889,\n \"stddev_ts\": 0.065348,\n \"samples_ns\": [ 2583790798, 2585380610, 2585119573 ],\n \"samples_ts\": [ 198.158, 198.037, 198.057 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:45:47Z\",\n \"avg_ns\": 2967353437,\n \"stddev_ns\": 1098261,\n \"avg_ts\": 43.136086,\n \"stddev_ts\": 0.015968,\n \"samples_ns\": [ 2966096972, 2967832843, 2968130496 ],\n \"samples_ts\": [ 43.1544, 43.1291, 43.1248 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:45:37Z", + "avg_ns": 2584763660, + "stddev_ns": 854087, + "avg_ts": 198.083889, + "stddev_ts": 0.065348, + "samples_ns": [ + 2583790798, + 2585380610, + 2585119573 + ], + "samples_ts": [ + 198.158, + 198.037, + 198.057 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:45:47Z", + "avg_ns": 2967353437, + "stddev_ns": 1098261, + "avg_ts": 43.136086, + "stddev_ts": 0.015968, + "samples_ns": [ + 2966096972, + 2967832843, + 2968130496 + ], + "samples_ts": [ + 43.1544, + 43.1291, + 43.1248 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 334 + }, + { + "timestamp_utc": "2025-12-08T23:46:44.021004+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:45:57Z\",\n \"avg_ns\": 2576471654,\n \"stddev_ns\": 240794,\n \"avg_ts\": 198.721380,\n \"stddev_ts\": 0.018571,\n \"samples_ns\": [ 2576749698, 2576333418, 2576331846 ],\n \"samples_ts\": [ 198.7, 198.732, 198.732 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:46:07Z\",\n \"avg_ns\": 12123848814,\n \"stddev_ns\": 2840998,\n \"avg_ts\": 42.230815,\n \"stddev_ts\": 0.009887,\n \"samples_ns\": [ 12122319850, 12122102151, 12127124442 ],\n \"samples_ts\": [ 42.2361, 42.2369, 42.2194 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:45:57Z", + "avg_ns": 2576471654, + "stddev_ns": 240794, + "avg_ts": 198.72138, + "stddev_ts": 0.018571, + "samples_ns": [ + 2576749698, + 2576333418, + 2576331846 + ], + "samples_ts": [ + 198.7, + 198.732, + 198.732 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:46:07Z", + "avg_ns": 12123848814, + "stddev_ns": 2840998, + "avg_ts": 42.230815, + "stddev_ts": 0.009887, + "samples_ns": [ + 12122319850, + 12122102151, + 12127124442 + ], + "samples_ts": [ + 42.2361, + 42.2369, + 42.2194 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 335 + }, + { + "timestamp_utc": "2025-12-08T23:46:56.205967+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:46:44Z\",\n \"avg_ns\": 583080444,\n \"stddev_ns\": 4547406,\n \"avg_ts\": 219.532680,\n \"stddev_ts\": 1.719516,\n \"samples_ns\": [ 577852708, 585269087, 586119539 ],\n \"samples_ts\": [ 221.51, 218.703, 218.385 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:46:47Z\",\n \"avg_ns\": 2981780504,\n \"stddev_ns\": 1390395,\n \"avg_ts\": 42.927378,\n \"stddev_ts\": 0.020021,\n \"samples_ns\": [ 2982911079, 2982202409, 2980228024 ],\n \"samples_ts\": [ 42.9111, 42.9213, 42.9497 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:46:44Z", + "avg_ns": 583080444, + "stddev_ns": 4547406, + "avg_ts": 219.53268, + "stddev_ts": 1.719516, + "samples_ns": [ + 577852708, + 585269087, + 586119539 + ], + "samples_ts": [ + 221.51, + 218.703, + 218.385 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:46:47Z", + "avg_ns": 2981780504, + "stddev_ns": 1390395, + "avg_ts": 42.927378, + "stddev_ts": 0.020021, + "samples_ns": [ + 2982911079, + 2982202409, + 2980228024 + ], + "samples_ts": [ + 42.9111, + 42.9213, + 42.9497 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 336 + }, + { + "timestamp_utc": "2025-12-08T23:47:35.836929+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:46:56Z\",\n \"avg_ns\": 580141803,\n \"stddev_ns\": 48716,\n \"avg_ts\": 220.635713,\n \"stddev_ts\": 0.013246,\n \"samples_ns\": [ 580137137, 580109544, 580178730 ],\n \"samples_ts\": [ 220.637, 220.648, 220.622 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:46:59Z\",\n \"avg_ns\": 12147107371,\n \"stddev_ns\": 5532943,\n \"avg_ts\": 42.149958,\n \"stddev_ts\": 0.019202,\n \"samples_ns\": [ 12148367460, 12151901586, 12141053067 ],\n \"samples_ts\": [ 42.1456, 42.1333, 42.171 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:46:56Z", + "avg_ns": 580141803, + "stddev_ns": 48716, + "avg_ts": 220.635713, + "stddev_ts": 0.013246, + "samples_ns": [ + 580137137, + 580109544, + 580178730 + ], + "samples_ts": [ + 220.637, + 220.648, + 220.622 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:46:59Z", + "avg_ns": 12147107371, + "stddev_ns": 5532943, + "avg_ts": 42.149958, + "stddev_ts": 0.019202, + "samples_ns": [ + 12148367460, + 12151901586, + 12141053067 + ], + "samples_ts": [ + 42.1456, + 42.1333, + 42.171 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 337 + }, + { + "timestamp_utc": "2025-12-08T23:47:55.288296+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:47:36Z\",\n \"avg_ns\": 2400699959,\n \"stddev_ns\": 764637,\n \"avg_ts\": 213.271147,\n \"stddev_ts\": 0.067639,\n \"samples_ns\": [ 2401560596, 2400425679, 2400113604 ],\n \"samples_ts\": [ 213.195, 213.296, 213.323 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:47:46Z\",\n \"avg_ns\": 2979619210,\n \"stddev_ns\": 729747,\n \"avg_ts\": 42.958511,\n \"stddev_ts\": 0.010490,\n \"samples_ns\": [ 2979214964, 2980459292, 2979183375 ],\n \"samples_ts\": [ 42.9643, 42.9464, 42.9648 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:47:36Z", + "avg_ns": 2400699959, + "stddev_ns": 764637, + "avg_ts": 213.271147, + "stddev_ts": 0.067639, + "samples_ns": [ + 2401560596, + 2400425679, + 2400113604 + ], + "samples_ts": [ + 213.195, + 213.296, + 213.323 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:47:46Z", + "avg_ns": 2979619210, + "stddev_ns": 729747, + "avg_ts": 42.958511, + "stddev_ts": 0.01049, + "samples_ns": [ + 2979214964, + 2980459292, + 2979183375 + ], + "samples_ts": [ + 42.9643, + 42.9464, + 42.9648 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 338 + }, + { + "timestamp_utc": "2025-12-08T23:48:42.266772+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:47:56Z\",\n \"avg_ns\": 2402519639,\n \"stddev_ns\": 290163,\n \"avg_ts\": 213.109602,\n \"stddev_ts\": 0.024995,\n \"samples_ns\": [ 2402650321, 2402712333, 2402196265 ],\n \"samples_ts\": [ 213.098, 213.093, 213.138 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:48:05Z\",\n \"avg_ns\": 12166249154,\n \"stddev_ns\": 4253941,\n \"avg_ts\": 42.083639,\n \"stddev_ts\": 0.014713,\n \"samples_ns\": [ 12168911136, 12168491571, 12161344756 ],\n \"samples_ts\": [ 42.0744, 42.0759, 42.1006 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:47:56Z", + "avg_ns": 2402519639, + "stddev_ns": 290163, + "avg_ts": 213.109602, + "stddev_ts": 0.024995, + "samples_ns": [ + 2402650321, + 2402712333, + 2402196265 + ], + "samples_ts": [ + 213.098, + 213.093, + 213.138 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:48:05Z", + "avg_ns": 12166249154, + "stddev_ns": 4253941, + "avg_ts": 42.083639, + "stddev_ts": 0.014713, + "samples_ns": [ + 12168911136, + 12168491571, + 12161344756 + ], + "samples_ts": [ + 42.0744, + 42.0759, + 42.1006 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 339 + }, + { + "timestamp_utc": "2025-12-08T23:48:54.445832+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:48:43Z\",\n \"avg_ns\": 577707759,\n \"stddev_ns\": 33137,\n \"avg_ts\": 221.565312,\n \"stddev_ts\": 0.008749,\n \"samples_ns\": [ 577726288, 577714710, 577682280 ],\n \"samples_ts\": [ 221.558, 221.563, 221.575 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:48:45Z\",\n \"avg_ns\": 2965957389,\n \"stddev_ns\": 101519,\n \"avg_ts\": 43.156385,\n \"stddev_ts\": 0.001247,\n \"samples_ns\": [ 2965864878, 2966034002, 2965973288 ],\n \"samples_ts\": [ 43.1577, 43.1553, 43.1562 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:48:43Z", + "avg_ns": 577707759, + "stddev_ns": 33137, + "avg_ts": 221.565312, + "stddev_ts": 0.008749, + "samples_ns": [ + 577726288, + 577714710, + 577682280 + ], + "samples_ts": [ + 221.558, + 221.563, + 221.575 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:48:45Z", + "avg_ns": 2965957389, + "stddev_ns": 101519, + "avg_ts": 43.156385, + "stddev_ts": 0.001247, + "samples_ns": [ + 2965864878, + 2966034002, + 2965973288 + ], + "samples_ts": [ + 43.1577, + 43.1553, + 43.1562 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 340 + }, + { + "timestamp_utc": "2025-12-08T23:49:34.236098+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:48:55Z\",\n \"avg_ns\": 577513499,\n \"stddev_ns\": 41543,\n \"avg_ts\": 221.639842,\n \"stddev_ts\": 0.015943,\n \"samples_ns\": [ 577559626, 577479030, 577501841 ],\n \"samples_ts\": [ 221.622, 221.653, 221.644 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:48:57Z\",\n \"avg_ns\": 12190494324,\n \"stddev_ns\": 5004844,\n \"avg_ts\": 41.999942,\n \"stddev_ts\": 0.017242,\n \"samples_ns\": [ 12195815304, 12189786741, 12185880927 ],\n \"samples_ts\": [ 41.9816, 42.0024, 42.0158 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:48:55Z", + "avg_ns": 577513499, + "stddev_ns": 41543, + "avg_ts": 221.639842, + "stddev_ts": 0.015943, + "samples_ns": [ + 577559626, + 577479030, + 577501841 + ], + "samples_ts": [ + 221.622, + 221.653, + 221.644 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:48:57Z", + "avg_ns": 12190494324, + "stddev_ns": 5004844, + "avg_ts": 41.999942, + "stddev_ts": 0.017242, + "samples_ns": [ + 12195815304, + 12189786741, + 12185880927 + ], + "samples_ts": [ + 41.9816, + 42.0024, + 42.0158 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 341 + }, + { + "timestamp_utc": "2025-12-08T23:49:53.904089+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:49:35Z\",\n \"avg_ns\": 2464874520,\n \"stddev_ns\": 252243,\n \"avg_ts\": 207.718486,\n \"stddev_ts\": 0.021258,\n \"samples_ns\": [ 2464591914, 2464954773, 2465076873 ],\n \"samples_ts\": [ 207.742, 207.712, 207.701 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:49:44Z\",\n \"avg_ns\": 2971598938,\n \"stddev_ns\": 996314,\n \"avg_ts\": 43.074457,\n \"stddev_ts\": 0.014399,\n \"samples_ns\": [ 2971608642, 2970600795, 2972587379 ],\n \"samples_ts\": [ 43.0743, 43.0889, 43.0601 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:49:35Z", + "avg_ns": 2464874520, + "stddev_ns": 252243, + "avg_ts": 207.718486, + "stddev_ts": 0.021258, + "samples_ns": [ + 2464591914, + 2464954773, + 2465076873 + ], + "samples_ts": [ + 207.742, + 207.712, + 207.701 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:49:44Z", + "avg_ns": 2971598938, + "stddev_ns": 996314, + "avg_ts": 43.074457, + "stddev_ts": 0.014399, + "samples_ns": [ + 2971608642, + 2970600795, + 2972587379 + ], + "samples_ts": [ + 43.0743, + 43.0889, + 43.0601 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 342 + }, + { + "timestamp_utc": "2025-12-08T23:50:41.053483+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:49:54Z\",\n \"avg_ns\": 2472644884,\n \"stddev_ns\": 348749,\n \"avg_ts\": 207.065725,\n \"stddev_ts\": 0.029204,\n \"samples_ns\": [ 2472356856, 2473032631, 2472545165 ],\n \"samples_ts\": [ 207.09, 207.033, 207.074 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:50:04Z\",\n \"avg_ns\": 12130005935,\n \"stddev_ns\": 5369899,\n \"avg_ts\": 42.209383,\n \"stddev_ts\": 0.018681,\n \"samples_ns\": [ 12136205866, 12126986346, 12126825593 ],\n \"samples_ts\": [ 42.1878, 42.2199, 42.2204 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:49:54Z", + "avg_ns": 2472644884, + "stddev_ns": 348749, + "avg_ts": 207.065725, + "stddev_ts": 0.029204, + "samples_ns": [ + 2472356856, + 2473032631, + 2472545165 + ], + "samples_ts": [ + 207.09, + 207.033, + 207.074 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:50:04Z", + "avg_ns": 12130005935, + "stddev_ns": 5369899, + "avg_ts": 42.209383, + "stddev_ts": 0.018681, + "samples_ns": [ + 12136205866, + 12126986346, + 12126825593 + ], + "samples_ts": [ + 42.1878, + 42.2199, + 42.2204 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 343 + }, + { + "timestamp_utc": "2025-12-08T23:50:53.219523+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:50:41Z\",\n \"avg_ns\": 587168640,\n \"stddev_ns\": 377465,\n \"avg_ts\": 217.995354,\n \"stddev_ts\": 0.140191,\n \"samples_ns\": [ 586734051, 587357130, 587414739 ],\n \"samples_ts\": [ 218.157, 217.925, 217.904 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:50:44Z\",\n \"avg_ns\": 2972659207,\n \"stddev_ns\": 3342974,\n \"avg_ts\": 43.059126,\n \"stddev_ts\": 0.048392,\n \"samples_ns\": [ 2976516418, 2970600512, 2970860691 ],\n \"samples_ts\": [ 43.0033, 43.0889, 43.0852 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:50:41Z", + "avg_ns": 587168640, + "stddev_ns": 377465, + "avg_ts": 217.995354, + "stddev_ts": 0.140191, + "samples_ns": [ + 586734051, + 587357130, + 587414739 + ], + "samples_ts": [ + 218.157, + 217.925, + 217.904 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:50:44Z", + "avg_ns": 2972659207, + "stddev_ns": 3342974, + "avg_ts": 43.059126, + "stddev_ts": 0.048392, + "samples_ns": [ + 2976516418, + 2970600512, + 2970860691 + ], + "samples_ts": [ + 43.0033, + 43.0889, + 43.0852 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 344 + }, + { + "timestamp_utc": "2025-12-08T23:51:32.963873+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:50:53Z\",\n \"avg_ns\": 579460858,\n \"stddev_ns\": 211119,\n \"avg_ts\": 220.895008,\n \"stddev_ts\": 0.079411,\n \"samples_ns\": [ 579700550, 579359005, 579323021 ],\n \"samples_ts\": [ 220.804, 220.934, 220.948 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:50:56Z\",\n \"avg_ns\": 12186169533,\n \"stddev_ns\": 1196136,\n \"avg_ts\": 42.014843,\n \"stddev_ts\": 0.004106,\n \"samples_ns\": [ 12186146628, 12184990120, 12187371852 ],\n \"samples_ts\": [ 42.0149, 42.0189, 42.0107 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:50:53Z", + "avg_ns": 579460858, + "stddev_ns": 211119, + "avg_ts": 220.895008, + "stddev_ts": 0.079411, + "samples_ns": [ + 579700550, + 579359005, + 579323021 + ], + "samples_ts": [ + 220.804, + 220.934, + 220.948 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:50:56Z", + "avg_ns": 12186169533, + "stddev_ns": 1196136, + "avg_ts": 42.014843, + "stddev_ts": 0.004106, + "samples_ns": [ + 12186146628, + 12184990120, + 12187371852 + ], + "samples_ts": [ + 42.0149, + 42.0189, + 42.0107 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 345 + }, + { + "timestamp_utc": "2025-12-08T23:51:53.406890+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:51:33Z\",\n \"avg_ns\": 2652131477,\n \"stddev_ns\": 10651269,\n \"avg_ts\": 193.054350,\n \"stddev_ts\": 0.777083,\n \"samples_ns\": [ 2658910813, 2657628581, 2639855039 ],\n \"samples_ts\": [ 192.56, 192.653, 193.95 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:51:44Z\",\n \"avg_ns\": 2975464065,\n \"stddev_ns\": 391586,\n \"avg_ts\": 43.018500,\n \"stddev_ts\": 0.005661,\n \"samples_ns\": [ 2975877826, 2975415114, 2975099255 ],\n \"samples_ts\": [ 43.0125, 43.0192, 43.0238 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:51:33Z", + "avg_ns": 2652131477, + "stddev_ns": 10651269, + "avg_ts": 193.05435, + "stddev_ts": 0.777083, + "samples_ns": [ + 2658910813, + 2657628581, + 2639855039 + ], + "samples_ts": [ + 192.56, + 192.653, + 193.95 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:51:44Z", + "avg_ns": 2975464065, + "stddev_ns": 391586, + "avg_ts": 43.0185, + "stddev_ts": 0.005661, + "samples_ns": [ + 2975877826, + 2975415114, + 2975099255 + ], + "samples_ts": [ + 43.0125, + 43.0192, + 43.0238 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 346 + }, + { + "timestamp_utc": "2025-12-08T23:52:41.140124+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:51:54Z\",\n \"avg_ns\": 2582465447,\n \"stddev_ns\": 447842,\n \"avg_ts\": 198.260159,\n \"stddev_ts\": 0.034159,\n \"samples_ns\": [ 2582933157, 2582415756, 2582047429 ],\n \"samples_ts\": [ 198.224, 198.264, 198.292 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:52:04Z\",\n \"avg_ns\": 12175754199,\n \"stddev_ns\": 4316891,\n \"avg_ts\": 42.050787,\n \"stddev_ts\": 0.014910,\n \"samples_ns\": [ 12171098723, 12179624810, 12176539064 ],\n \"samples_ts\": [ 42.0669, 42.0374, 42.0481 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:51:54Z", + "avg_ns": 2582465447, + "stddev_ns": 447842, + "avg_ts": 198.260159, + "stddev_ts": 0.034159, + "samples_ns": [ + 2582933157, + 2582415756, + 2582047429 + ], + "samples_ts": [ + 198.224, + 198.264, + 198.292 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:52:04Z", + "avg_ns": 12175754199, + "stddev_ns": 4316891, + "avg_ts": 42.050787, + "stddev_ts": 0.01491, + "samples_ns": [ + 12171098723, + 12179624810, + 12176539064 + ], + "samples_ts": [ + 42.0669, + 42.0374, + 42.0481 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 347 + }, + { + "timestamp_utc": "2025-12-08T23:52:53.367534+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:52:41Z\",\n \"avg_ns\": 588463794,\n \"stddev_ns\": 608304,\n \"avg_ts\": 217.515661,\n \"stddev_ts\": 0.224778,\n \"samples_ns\": [ 587931463, 588333094, 589126825 ],\n \"samples_ts\": [ 217.712, 217.564, 217.271 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:52:44Z\",\n \"avg_ns\": 2991839726,\n \"stddev_ns\": 960872,\n \"avg_ts\": 42.783043,\n \"stddev_ts\": 0.013695,\n \"samples_ns\": [ 2990959509, 2991700003, 2992859668 ],\n \"samples_ts\": [ 42.7956, 42.785, 42.7685 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:52:41Z", + "avg_ns": 588463794, + "stddev_ns": 608304, + "avg_ts": 217.515661, + "stddev_ts": 0.224778, + "samples_ns": [ + 587931463, + 588333094, + 589126825 + ], + "samples_ts": [ + 217.712, + 217.564, + 217.271 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:52:44Z", + "avg_ns": 2991839726, + "stddev_ns": 960872, + "avg_ts": 42.783043, + "stddev_ts": 0.013695, + "samples_ns": [ + 2990959509, + 2991700003, + 2992859668 + ], + "samples_ts": [ + 42.7956, + 42.785, + 42.7685 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 348 + }, + { + "timestamp_utc": "2025-12-08T23:53:33.324344+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:52:54Z\",\n \"avg_ns\": 585636672,\n \"stddev_ns\": 134952,\n \"avg_ts\": 218.565555,\n \"stddev_ts\": 0.048723,\n \"samples_ns\": [ 585745429, 585491906, 585672683 ],\n \"samples_ts\": [ 218.525, 218.62, 218.552 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:52:56Z\",\n \"avg_ns\": 12247989885,\n \"stddev_ns\": 2213620,\n \"avg_ts\": 41.802779,\n \"stddev_ts\": 0.007546,\n \"samples_ns\": [ 12245583959, 12248453589, 12249932108 ],\n \"samples_ts\": [ 41.811, 41.8012, 41.7962 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:52:54Z", + "avg_ns": 585636672, + "stddev_ns": 134952, + "avg_ts": 218.565555, + "stddev_ts": 0.048723, + "samples_ns": [ + 585745429, + 585491906, + 585672683 + ], + "samples_ts": [ + 218.525, + 218.62, + 218.552 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:52:56Z", + "avg_ns": 12247989885, + "stddev_ns": 2213620, + "avg_ts": 41.802779, + "stddev_ts": 0.007546, + "samples_ns": [ + 12245583959, + 12248453589, + 12249932108 + ], + "samples_ts": [ + 41.811, + 41.8012, + 41.7962 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 349 + }, + { + "timestamp_utc": "2025-12-08T23:53:52.942562+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:53:34Z\",\n \"avg_ns\": 2436603237,\n \"stddev_ns\": 86501,\n \"avg_ts\": 210.128589,\n \"stddev_ts\": 0.007460,\n \"samples_ns\": [ 2436519071, 2436598742, 2436691898 ],\n \"samples_ts\": [ 210.136, 210.129, 210.121 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:53:43Z\",\n \"avg_ns\": 2990727516,\n \"stddev_ns\": 1861930,\n \"avg_ts\": 42.798962,\n \"stddev_ts\": 0.026626,\n \"samples_ns\": [ 2989266563, 2992822913, 2990093073 ],\n \"samples_ts\": [ 42.8199, 42.769, 42.808 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:53:34Z", + "avg_ns": 2436603237, + "stddev_ns": 86501, + "avg_ts": 210.128589, + "stddev_ts": 0.00746, + "samples_ns": [ + 2436519071, + 2436598742, + 2436691898 + ], + "samples_ts": [ + 210.136, + 210.129, + 210.121 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:53:43Z", + "avg_ns": 2990727516, + "stddev_ns": 1861930, + "avg_ts": 42.798962, + "stddev_ts": 0.026626, + "samples_ns": [ + 2989266563, + 2992822913, + 2990093073 + ], + "samples_ts": [ + 42.8199, + 42.769, + 42.808 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 350 + }, + { + "timestamp_utc": "2025-12-08T23:54:39.780662+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:53:53Z\",\n \"avg_ns\": 2401059162,\n \"stddev_ns\": 312261,\n \"avg_ts\": 213.239229,\n \"stddev_ts\": 0.027041,\n \"samples_ns\": [ 2400736262, 2401341056, 2401100170 ],\n \"samples_ts\": [ 213.268, 213.214, 213.236 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:54:03Z\",\n \"avg_ns\": 12121093340,\n \"stddev_ns\": 4764070,\n \"avg_ts\": 42.240418,\n \"stddev_ts\": 0.016590,\n \"samples_ns\": [ 12119231673, 12126504439, 12117543910 ],\n \"samples_ts\": [ 42.2469, 42.2216, 42.2528 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:53:53Z", + "avg_ns": 2401059162, + "stddev_ns": 312261, + "avg_ts": 213.239229, + "stddev_ts": 0.027041, + "samples_ns": [ + 2400736262, + 2401341056, + 2401100170 + ], + "samples_ts": [ + 213.268, + 213.214, + 213.236 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:54:03Z", + "avg_ns": 12121093340, + "stddev_ns": 4764070, + "avg_ts": 42.240418, + "stddev_ts": 0.01659, + "samples_ns": [ + 12119231673, + 12126504439, + 12117543910 + ], + "samples_ts": [ + 42.2469, + 42.2216, + 42.2528 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 351 + }, + { + "timestamp_utc": "2025-12-08T23:54:51.840351+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:54:40Z\",\n \"avg_ns\": 582611159,\n \"stddev_ns\": 4240441,\n \"avg_ts\": 219.708347,\n \"stddev_ts\": 1.604136,\n \"samples_ns\": [ 586013733, 583959169, 577860575 ],\n \"samples_ts\": [ 218.425, 219.193, 221.507 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:54:42Z\",\n \"avg_ns\": 2943557965,\n \"stddev_ns\": 401991,\n \"avg_ts\": 43.484791,\n \"stddev_ts\": 0.005830,\n \"samples_ns\": [ 2943792663, 2943778845, 2943102389 ],\n \"samples_ts\": [ 43.4813, 43.4815, 43.4915 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:54:40Z", + "avg_ns": 582611159, + "stddev_ns": 4240441, + "avg_ts": 219.708347, + "stddev_ts": 1.604136, + "samples_ns": [ + 586013733, + 583959169, + 577860575 + ], + "samples_ts": [ + 218.425, + 219.193, + 221.507 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:54:42Z", + "avg_ns": 2943557965, + "stddev_ns": 401991, + "avg_ts": 43.484791, + "stddev_ts": 0.00583, + "samples_ns": [ + 2943792663, + 2943778845, + 2943102389 + ], + "samples_ts": [ + 43.4813, + 43.4815, + 43.4915 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 352 + }, + { + "timestamp_utc": "2025-12-08T23:55:31.274743+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:54:52Z\",\n \"avg_ns\": 575865643,\n \"stddev_ns\": 166882,\n \"avg_ts\": 222.274082,\n \"stddev_ts\": 0.064421,\n \"samples_ns\": [ 575679371, 576001521, 575916037 ],\n \"samples_ts\": [ 222.346, 222.222, 222.255 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:54:54Z\",\n \"avg_ns\": 12082437655,\n \"stddev_ns\": 8883014,\n \"avg_ts\": 42.375570,\n \"stddev_ts\": 0.031141,\n \"samples_ns\": [ 12092289940, 12075045110, 12079977917 ],\n \"samples_ts\": [ 42.341, 42.4015, 42.3842 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:54:52Z", + "avg_ns": 575865643, + "stddev_ns": 166882, + "avg_ts": 222.274082, + "stddev_ts": 0.064421, + "samples_ns": [ + 575679371, + 576001521, + 575916037 + ], + "samples_ts": [ + 222.346, + 222.222, + 222.255 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:54:54Z", + "avg_ns": 12082437655, + "stddev_ns": 8883014, + "avg_ts": 42.37557, + "stddev_ts": 0.031141, + "samples_ns": [ + 12092289940, + 12075045110, + 12079977917 + ], + "samples_ts": [ + 42.341, + 42.4015, + 42.3842 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 353 + }, + { + "timestamp_utc": "2025-12-08T23:55:50.849343+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:55:32Z\",\n \"avg_ns\": 2432070269,\n \"stddev_ns\": 726006,\n \"avg_ts\": 210.520245,\n \"stddev_ts\": 0.062544,\n \"samples_ns\": [ 2431817101, 2432885440, 2431508268 ],\n \"samples_ts\": [ 210.542, 210.45, 210.569 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:55:41Z\",\n \"avg_ns\": 2971820617,\n \"stddev_ns\": 521251,\n \"avg_ts\": 43.071241,\n \"stddev_ts\": 0.007555,\n \"samples_ns\": [ 2971271481, 2971881784, 2972308586 ],\n \"samples_ts\": [ 43.0792, 43.0704, 43.0642 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:55:32Z", + "avg_ns": 2432070269, + "stddev_ns": 726006, + "avg_ts": 210.520245, + "stddev_ts": 0.062544, + "samples_ns": [ + 2431817101, + 2432885440, + 2431508268 + ], + "samples_ts": [ + 210.542, + 210.45, + 210.569 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:55:41Z", + "avg_ns": 2971820617, + "stddev_ns": 521251, + "avg_ts": 43.071241, + "stddev_ts": 0.007555, + "samples_ns": [ + 2971271481, + 2971881784, + 2972308586 + ], + "samples_ts": [ + 43.0792, + 43.0704, + 43.0642 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 354 + }, + { + "timestamp_utc": "2025-12-08T23:56:37.579988+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:55:51Z\",\n \"avg_ns\": 2400555666,\n \"stddev_ns\": 400988,\n \"avg_ts\": 213.283956,\n \"stddev_ts\": 0.035088,\n \"samples_ns\": [ 2400355675, 2400300707, 2401010618 ],\n \"samples_ts\": [ 213.302, 213.307, 213.244 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:56:01Z\",\n \"avg_ns\": 12088436931,\n \"stddev_ns\": 13638698,\n \"avg_ts\": 42.354561,\n \"stddev_ts\": 0.047813,\n \"samples_ns\": [ 12072783338, 12097751575, 12094775882 ],\n \"samples_ts\": [ 42.4094, 42.3219, 42.3323 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:55:51Z", + "avg_ns": 2400555666, + "stddev_ns": 400988, + "avg_ts": 213.283956, + "stddev_ts": 0.035088, + "samples_ns": [ + 2400355675, + 2400300707, + 2401010618 + ], + "samples_ts": [ + 213.302, + 213.307, + 213.244 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:56:01Z", + "avg_ns": 12088436931, + "stddev_ns": 13638698, + "avg_ts": 42.354561, + "stddev_ts": 0.047813, + "samples_ns": [ + 12072783338, + 12097751575, + 12094775882 + ], + "samples_ts": [ + 42.4094, + 42.3219, + 42.3323 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 355 + }, + { + "timestamp_utc": "2025-12-08T23:56:49.618779+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:56:38Z\",\n \"avg_ns\": 575688429,\n \"stddev_ns\": 161309,\n \"avg_ts\": 222.342503,\n \"stddev_ts\": 0.060910,\n \"samples_ns\": [ 575834377, 575709763, 575521149 ],\n \"samples_ts\": [ 222.286, 222.334, 222.407 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:56:40Z\",\n \"avg_ns\": 2949053136,\n \"stddev_ns\": 1194643,\n \"avg_ts\": 43.403767,\n \"stddev_ts\": 0.017542,\n \"samples_ns\": [ 2948538058, 2950416251, 2948205101 ],\n \"samples_ts\": [ 43.4113, 43.3837, 43.4162 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:56:38Z", + "avg_ns": 575688429, + "stddev_ns": 161309, + "avg_ts": 222.342503, + "stddev_ts": 0.06091, + "samples_ns": [ + 575834377, + 575709763, + 575521149 + ], + "samples_ts": [ + 222.286, + 222.334, + 222.407 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:56:40Z", + "avg_ns": 2949053136, + "stddev_ns": 1194643, + "avg_ts": 43.403767, + "stddev_ts": 0.017542, + "samples_ns": [ + 2948538058, + 2950416251, + 2948205101 + ], + "samples_ts": [ + 43.4113, + 43.3837, + 43.4162 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 356 + }, + { + "timestamp_utc": "2025-12-08T23:57:29.273530+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:56:50Z\",\n \"avg_ns\": 584592662,\n \"stddev_ns\": 508801,\n \"avg_ts\": 218.955989,\n \"stddev_ts\": 0.190260,\n \"samples_ns\": [ 585178553, 584270671, 584328763 ],\n \"samples_ts\": [ 218.737, 219.077, 219.055 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:56:52Z\",\n \"avg_ns\": 12151118600,\n \"stddev_ns\": 48362591,\n \"avg_ts\": 42.136483,\n \"stddev_ts\": 0.167610,\n \"samples_ns\": [ 12201675492, 12146380762, 12105299548 ],\n \"samples_ts\": [ 41.9615, 42.1525, 42.2955 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:56:50Z", + "avg_ns": 584592662, + "stddev_ns": 508801, + "avg_ts": 218.955989, + "stddev_ts": 0.19026, + "samples_ns": [ + 585178553, + 584270671, + 584328763 + ], + "samples_ts": [ + 218.737, + 219.077, + 219.055 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:56:52Z", + "avg_ns": 12151118600, + "stddev_ns": 48362591, + "avg_ts": 42.136483, + "stddev_ts": 0.16761, + "samples_ns": [ + 12201675492, + 12146380762, + 12105299548 + ], + "samples_ts": [ + 41.9615, + 42.1525, + 42.2955 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 357 + }, + { + "timestamp_utc": "2025-12-08T23:57:49.295625+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:57:30Z\",\n \"avg_ns\": 2553113004,\n \"stddev_ns\": 711591,\n \"avg_ts\": 200.539508,\n \"stddev_ts\": 0.055615,\n \"samples_ns\": [ 2553248263, 2552347138, 2553743613 ],\n \"samples_ts\": [ 200.529, 200.6, 200.49 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:57:40Z\",\n \"avg_ns\": 2957901689,\n \"stddev_ns\": 827610,\n \"avg_ts\": 43.273922,\n \"stddev_ts\": 0.012108,\n \"samples_ns\": [ 2957065628, 2958720581, 2957918858 ],\n \"samples_ts\": [ 43.2862, 43.2619, 43.2737 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:57:30Z", + "avg_ns": 2553113004, + "stddev_ns": 711591, + "avg_ts": 200.539508, + "stddev_ts": 0.055615, + "samples_ns": [ + 2553248263, + 2552347138, + 2553743613 + ], + "samples_ts": [ + 200.529, + 200.6, + 200.49 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:57:40Z", + "avg_ns": 2957901689, + "stddev_ns": 827610, + "avg_ts": 43.273922, + "stddev_ts": 0.012108, + "samples_ns": [ + 2957065628, + 2958720581, + 2957918858 + ], + "samples_ts": [ + 43.2862, + 43.2619, + 43.2737 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 358 + }, + { + "timestamp_utc": "2025-12-08T23:58:36.575707+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:57:50Z\",\n \"avg_ns\": 2530507997,\n \"stddev_ns\": 66831,\n \"avg_ts\": 202.330916,\n \"stddev_ts\": 0.003518,\n \"samples_ns\": [ 2530463907, 2530551905, 2530508180 ],\n \"samples_ts\": [ 202.334, 202.327, 202.331 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:58:00Z\",\n \"avg_ns\": 12096273509,\n \"stddev_ns\": 11040809,\n \"avg_ts\": 42.327109,\n \"stddev_ts\": 0.038648,\n \"samples_ns\": [ 12083864779, 12099947079, 12105008670 ],\n \"samples_ts\": [ 42.3706, 42.3142, 42.2965 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:57:50Z", + "avg_ns": 2530507997, + "stddev_ns": 66831, + "avg_ts": 202.330916, + "stddev_ts": 0.003518, + "samples_ns": [ + 2530463907, + 2530551905, + 2530508180 + ], + "samples_ts": [ + 202.334, + 202.327, + 202.331 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:58:00Z", + "avg_ns": 12096273509, + "stddev_ns": 11040809, + "avg_ts": 42.327109, + "stddev_ts": 0.038648, + "samples_ns": [ + 12083864779, + 12099947079, + 12105008670 + ], + "samples_ts": [ + 42.3706, + 42.3142, + 42.2965 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 359 + }, + { + "timestamp_utc": "2025-12-08T23:58:48.480387+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:58:37Z\",\n \"avg_ns\": 405733229,\n \"stddev_ns\": 283011,\n \"avg_ts\": 315.478330,\n \"stddev_ts\": 0.219971,\n \"samples_ns\": [ 406058443, 405598418, 405542826 ],\n \"samples_ts\": [ 315.226, 315.583, 315.626 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:58:38Z\",\n \"avg_ns\": 3130023348,\n \"stddev_ns\": 187050,\n \"avg_ts\": 40.894264,\n \"stddev_ts\": 0.002444,\n \"samples_ns\": [ 3130208472, 3130027144, 3129834428 ],\n \"samples_ts\": [ 40.8918, 40.8942, 40.8967 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:58:37Z", + "avg_ns": 405733229, + "stddev_ns": 283011, + "avg_ts": 315.47833, + "stddev_ts": 0.219971, + "samples_ns": [ + 406058443, + 405598418, + 405542826 + ], + "samples_ts": [ + 315.226, + 315.583, + 315.626 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:58:38Z", + "avg_ns": 3130023348, + "stddev_ns": 187050, + "avg_ts": 40.894264, + "stddev_ts": 0.002444, + "samples_ns": [ + 3130208472, + 3130027144, + 3129834428 + ], + "samples_ts": [ + 40.8918, + 40.8942, + 40.8967 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 360 + }, + { + "timestamp_utc": "2025-12-08T23:59:29.118019+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:58:49Z\",\n \"avg_ns\": 406496025,\n \"stddev_ns\": 56706,\n \"avg_ts\": 314.886231,\n \"stddev_ts\": 0.037968,\n \"samples_ns\": [ 406475600, 406460525, 406551952 ],\n \"samples_ts\": [ 314.902, 314.914, 314.843 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:58:50Z\",\n \"avg_ns\": 12715073933,\n \"stddev_ns\": 4613713,\n \"avg_ts\": 40.267170,\n \"stddev_ts\": 0.014604,\n \"samples_ns\": [ 12720275755, 12713462683, 12711483362 ],\n \"samples_ts\": [ 40.2507, 40.2723, 40.2785 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:58:49Z", + "avg_ns": 406496025, + "stddev_ns": 56706, + "avg_ts": 314.886231, + "stddev_ts": 0.037968, + "samples_ns": [ + 406475600, + 406460525, + 406551952 + ], + "samples_ts": [ + 314.902, + 314.914, + 314.843 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:58:50Z", + "avg_ns": 12715073933, + "stddev_ns": 4613713, + "avg_ts": 40.26717, + "stddev_ts": 0.014604, + "samples_ns": [ + 12720275755, + 12713462683, + 12711483362 + ], + "samples_ts": [ + 40.2507, + 40.2723, + 40.2785 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 361 + }, + { + "timestamp_utc": "2025-12-08T23:59:46.054556+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:59:29Z\",\n \"avg_ns\": 1661679100,\n \"stddev_ns\": 387612,\n \"avg_ts\": 308.122079,\n \"stddev_ts\": 0.071868,\n \"samples_ns\": [ 1662110316, 1661359652, 1661567332 ],\n \"samples_ts\": [ 308.042, 308.181, 308.143 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:59:36Z\",\n \"avg_ns\": 3132720895,\n \"stddev_ns\": 1975793,\n \"avg_ts\": 40.859061,\n \"stddev_ts\": 0.025779,\n \"samples_ns\": [ 3130440674, 3133796166, 3133925845 ],\n \"samples_ts\": [ 40.8888, 40.845, 40.8433 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:59:29Z", + "avg_ns": 1661679100, + "stddev_ns": 387612, + "avg_ts": 308.122079, + "stddev_ts": 0.071868, + "samples_ns": [ + 1662110316, + 1661359652, + 1661567332 + ], + "samples_ts": [ + 308.042, + 308.181, + 308.143 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-08T23:59:36Z", + "avg_ns": 3132720895, + "stddev_ns": 1975793, + "avg_ts": 40.859061, + "stddev_ts": 0.025779, + "samples_ns": [ + 3130440674, + 3133796166, + 3133925845 + ], + "samples_ts": [ + 40.8888, + 40.845, + 40.8433 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 362 + }, + { + "timestamp_utc": "2025-12-09T00:00:31.907970+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:59:46Z\",\n \"avg_ns\": 1673298013,\n \"stddev_ns\": 146154,\n \"avg_ts\": 305.982556,\n \"stddev_ts\": 0.025659,\n \"samples_ns\": [ 1673325774, 1673145895, 1673422371 ],\n \"samples_ts\": [ 305.977, 306.01, 305.96 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-08T23:59:53Z\",\n \"avg_ns\": 12765020342,\n \"stddev_ns\": 56769576,\n \"avg_ts\": 40.110140,\n \"stddev_ts\": 0.178586,\n \"samples_ns\": [ 12703893106, 12816086832, 12775081089 ],\n \"samples_ts\": [ 40.3026, 39.9498, 40.078 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-08T23:59:46Z", + "avg_ns": 1673298013, + "stddev_ns": 146154, + "avg_ts": 305.982556, + "stddev_ts": 0.025659, + "samples_ns": [ + 1673325774, + 1673145895, + 1673422371 + ], + "samples_ts": [ + 305.977, + 306.01, + 305.96 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-08T23:59:53Z", + "avg_ns": 12765020342, + "stddev_ns": 56769576, + "avg_ts": 40.11014, + "stddev_ts": 0.178586, + "samples_ns": [ + 12703893106, + 12816086832, + 12775081089 + ], + "samples_ts": [ + 40.3026, + 39.9498, + 40.078 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 363 + }, + { + "timestamp_utc": "2025-12-09T00:00:43.822277+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:00:32Z\",\n \"avg_ns\": 408239897,\n \"stddev_ns\": 294696,\n \"avg_ts\": 313.541242,\n \"stddev_ts\": 0.226394,\n \"samples_ns\": [ 408490900, 407915412, 408313379 ],\n \"samples_ts\": [ 313.348, 313.791, 313.485 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:00:34Z\",\n \"avg_ns\": 3128078430,\n \"stddev_ns\": 268103,\n \"avg_ts\": 40.919690,\n \"stddev_ts\": 0.003430,\n \"samples_ns\": [ 3127891155, 3127966040, 3128378096 ],\n \"samples_ts\": [ 40.9221, 40.9212, 40.9158 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:00:32Z", + "avg_ns": 408239897, + "stddev_ns": 294696, + "avg_ts": 313.541242, + "stddev_ts": 0.226394, + "samples_ns": [ + 408490900, + 407915412, + 408313379 + ], + "samples_ts": [ + 313.348, + 313.791, + 313.485 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:00:34Z", + "avg_ns": 3128078430, + "stddev_ns": 268103, + "avg_ts": 40.91969, + "stddev_ts": 0.00343, + "samples_ns": [ + 3127891155, + 3127966040, + 3128378096 + ], + "samples_ts": [ + 40.9221, + 40.9212, + 40.9158 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 364 + }, + { + "timestamp_utc": "2025-12-09T00:01:25.200392+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:00:44Z\",\n \"avg_ns\": 407090620,\n \"stddev_ns\": 278175,\n \"avg_ts\": 314.426404,\n \"stddev_ts\": 0.214877,\n \"samples_ns\": [ 407354506, 406800074, 407117280 ],\n \"samples_ts\": [ 314.223, 314.651, 314.406 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:00:46Z\",\n \"avg_ns\": 12945815214,\n \"stddev_ns\": 7339649,\n \"avg_ts\": 39.549468,\n \"stddev_ts\": 0.022429,\n \"samples_ns\": [ 12937476369, 12951295437, 12948673836 ],\n \"samples_ts\": [ 39.575, 39.5327, 39.5407 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:00:44Z", + "avg_ns": 407090620, + "stddev_ns": 278175, + "avg_ts": 314.426404, + "stddev_ts": 0.214877, + "samples_ns": [ + 407354506, + 406800074, + 407117280 + ], + "samples_ts": [ + 314.223, + 314.651, + 314.406 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:00:46Z", + "avg_ns": 12945815214, + "stddev_ns": 7339649, + "avg_ts": 39.549468, + "stddev_ts": 0.022429, + "samples_ns": [ + 12937476369, + 12951295437, + 12948673836 + ], + "samples_ts": [ + 39.575, + 39.5327, + 39.5407 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 365 + }, + { + "timestamp_utc": "2025-12-09T00:01:42.255098+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:01:25Z\",\n \"avg_ns\": 1696275893,\n \"stddev_ns\": 1212548,\n \"avg_ts\": 301.837795,\n \"stddev_ts\": 0.215839,\n \"samples_ns\": [ 1696745620, 1697183304, 1694898755 ],\n \"samples_ts\": [ 301.754, 301.676, 302.083 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:01:32Z\",\n \"avg_ns\": 3126014402,\n \"stddev_ns\": 1625068,\n \"avg_ts\": 40.946716,\n \"stddev_ts\": 0.021280,\n \"samples_ns\": [ 3127032977, 3124141443, 3126868787 ],\n \"samples_ts\": [ 40.9334, 40.9713, 40.9355 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:01:25Z", + "avg_ns": 1696275893, + "stddev_ns": 1212548, + "avg_ts": 301.837795, + "stddev_ts": 0.215839, + "samples_ns": [ + 1696745620, + 1697183304, + 1694898755 + ], + "samples_ts": [ + 301.754, + 301.676, + 302.083 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:01:32Z", + "avg_ns": 3126014402, + "stddev_ns": 1625068, + "avg_ts": 40.946716, + "stddev_ts": 0.02128, + "samples_ns": [ + 3127032977, + 3124141443, + 3126868787 + ], + "samples_ts": [ + 40.9334, + 40.9713, + 40.9355 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 366 + }, + { + "timestamp_utc": "2025-12-09T00:02:28.501417+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:01:43Z\",\n \"avg_ns\": 1734076496,\n \"stddev_ns\": 918426,\n \"avg_ts\": 295.258079,\n \"stddev_ts\": 0.156022,\n \"samples_ns\": [ 1735102699, 1733787494, 1733339297 ],\n \"samples_ts\": [ 295.083, 295.307, 295.384 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:01:49Z\",\n \"avg_ns\": 12814067225,\n \"stddev_ns\": 6095156,\n \"avg_ts\": 39.956094,\n \"stddev_ts\": 0.019002,\n \"samples_ns\": [ 12807418442, 12819384244, 12815398991 ],\n \"samples_ts\": [ 39.9768, 39.9395, 39.9519 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:01:43Z", + "avg_ns": 1734076496, + "stddev_ns": 918426, + "avg_ts": 295.258079, + "stddev_ts": 0.156022, + "samples_ns": [ + 1735102699, + 1733787494, + 1733339297 + ], + "samples_ts": [ + 295.083, + 295.307, + 295.384 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:01:49Z", + "avg_ns": 12814067225, + "stddev_ns": 6095156, + "avg_ts": 39.956094, + "stddev_ts": 0.019002, + "samples_ns": [ + 12807418442, + 12819384244, + 12815398991 + ], + "samples_ts": [ + 39.9768, + 39.9395, + 39.9519 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 367 + }, + { + "timestamp_utc": "2025-12-09T00:02:40.435614+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:02:29Z\",\n \"avg_ns\": 406507184,\n \"stddev_ns\": 168855,\n \"avg_ts\": 314.877619,\n \"stddev_ts\": 0.129836,\n \"samples_ns\": [ 406694983, 406372609, 406453961 ],\n \"samples_ts\": [ 314.732, 314.982, 314.919 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:02:30Z\",\n \"avg_ns\": 3136658909,\n \"stddev_ns\": 4899331,\n \"avg_ts\": 40.807819,\n \"stddev_ts\": 0.063680,\n \"samples_ns\": [ 3133528351, 3142304631, 3134143746 ],\n \"samples_ts\": [ 40.8485, 40.7344, 40.8405 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:02:29Z", + "avg_ns": 406507184, + "stddev_ns": 168855, + "avg_ts": 314.877619, + "stddev_ts": 0.129836, + "samples_ns": [ + 406694983, + 406372609, + 406453961 + ], + "samples_ts": [ + 314.732, + 314.982, + 314.919 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:02:30Z", + "avg_ns": 3136658909, + "stddev_ns": 4899331, + "avg_ts": 40.807819, + "stddev_ts": 0.06368, + "samples_ns": [ + 3133528351, + 3142304631, + 3134143746 + ], + "samples_ts": [ + 40.8485, + 40.7344, + 40.8405 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 368 + }, + { + "timestamp_utc": "2025-12-09T00:03:21.348175+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:02:41Z\",\n \"avg_ns\": 408415165,\n \"stddev_ns\": 76220,\n \"avg_ts\": 313.406586,\n \"stddev_ts\": 0.056399,\n \"samples_ns\": [ 408480271, 408429752, 408335473 ],\n \"samples_ts\": [ 313.357, 313.395, 313.468 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:02:42Z\",\n \"avg_ns\": 12787668091,\n \"stddev_ns\": 11106882,\n \"avg_ts\": 40.038595,\n \"stddev_ts\": 0.034773,\n \"samples_ns\": [ 12776475806, 12798685196, 12787843273 ],\n \"samples_ts\": [ 40.0736, 40.0041, 40.038 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:02:41Z", + "avg_ns": 408415165, + "stddev_ns": 76220, + "avg_ts": 313.406586, + "stddev_ts": 0.056399, + "samples_ns": [ + 408480271, + 408429752, + 408335473 + ], + "samples_ts": [ + 313.357, + 313.395, + 313.468 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:02:42Z", + "avg_ns": 12787668091, + "stddev_ns": 11106882, + "avg_ts": 40.038595, + "stddev_ts": 0.034773, + "samples_ns": [ + 12776475806, + 12798685196, + 12787843273 + ], + "samples_ts": [ + 40.0736, + 40.0041, + 40.038 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 369 + }, + { + "timestamp_utc": "2025-12-09T00:03:39.218792+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:03:22Z\",\n \"avg_ns\": 1876157217,\n \"stddev_ns\": 431244,\n \"avg_ts\": 272.898248,\n \"stddev_ts\": 0.062095,\n \"samples_ns\": [ 1876538222, 1875695891, 1876237540 ],\n \"samples_ts\": [ 272.843, 272.965, 272.887 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:03:29Z\",\n \"avg_ns\": 3156865922,\n \"stddev_ns\": 996438,\n \"avg_ts\": 40.546546,\n \"stddev_ts\": 0.012755,\n \"samples_ns\": [ 3156254544, 3156331232, 3158011992 ],\n \"samples_ts\": [ 40.5544, 40.5534, 40.5318 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:03:22Z", + "avg_ns": 1876157217, + "stddev_ns": 431244, + "avg_ts": 272.898248, + "stddev_ts": 0.062095, + "samples_ns": [ + 1876538222, + 1875695891, + 1876237540 + ], + "samples_ts": [ + 272.843, + 272.965, + 272.887 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:03:29Z", + "avg_ns": 3156865922, + "stddev_ns": 996438, + "avg_ts": 40.546546, + "stddev_ts": 0.012755, + "samples_ns": [ + 3156254544, + 3156331232, + 3158011992 + ], + "samples_ts": [ + 40.5544, + 40.5534, + 40.5318 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 370 + }, + { + "timestamp_utc": "2025-12-09T00:04:26.280465+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:03:39Z\",\n \"avg_ns\": 1881251407,\n \"stddev_ns\": 1220823,\n \"avg_ts\": 272.159341,\n \"stddev_ts\": 0.176328,\n \"samples_ns\": [ 1880644080, 1882655077, 1880455066 ],\n \"samples_ts\": [ 272.247, 271.956, 272.275 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:03:47Z\",\n \"avg_ns\": 12888183751,\n \"stddev_ns\": 1138983,\n \"avg_ts\": 39.726312,\n \"stddev_ts\": 0.003511,\n \"samples_ns\": [ 12887814928, 12889461442, 12887274883 ],\n \"samples_ts\": [ 39.7274, 39.7224, 39.7291 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:03:39Z", + "avg_ns": 1881251407, + "stddev_ns": 1220823, + "avg_ts": 272.159341, + "stddev_ts": 0.176328, + "samples_ns": [ + 1880644080, + 1882655077, + 1880455066 + ], + "samples_ts": [ + 272.247, + 271.956, + 272.275 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:03:47Z", + "avg_ns": 12888183751, + "stddev_ns": 1138983, + "avg_ts": 39.726312, + "stddev_ts": 0.003511, + "samples_ns": [ + 12887814928, + 12889461442, + 12887274883 + ], + "samples_ts": [ + 39.7274, + 39.7224, + 39.7291 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 371 + }, + { + "timestamp_utc": "2025-12-09T00:04:38.310903+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:04:27Z\",\n \"avg_ns\": 407179240,\n \"stddev_ns\": 261019,\n \"avg_ts\": 314.357960,\n \"stddev_ts\": 0.201451,\n \"samples_ns\": [ 406988387, 407072646, 407476687 ],\n \"samples_ts\": [ 314.505, 314.44, 314.128 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:04:28Z\",\n \"avg_ns\": 3163493624,\n \"stddev_ns\": 1567511,\n \"avg_ts\": 40.461602,\n \"stddev_ts\": 0.020028,\n \"samples_ns\": [ 3164209407, 3161698198, 3164573269 ],\n \"samples_ts\": [ 40.4524, 40.4846, 40.4478 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:04:27Z", + "avg_ns": 407179240, + "stddev_ns": 261019, + "avg_ts": 314.35796, + "stddev_ts": 0.201451, + "samples_ns": [ + 406988387, + 407072646, + 407476687 + ], + "samples_ts": [ + 314.505, + 314.44, + 314.128 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:04:28Z", + "avg_ns": 3163493624, + "stddev_ns": 1567511, + "avg_ts": 40.461602, + "stddev_ts": 0.020028, + "samples_ns": [ + 3164209407, + 3161698198, + 3164573269 + ], + "samples_ts": [ + 40.4524, + 40.4846, + 40.4478 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 372 + }, + { + "timestamp_utc": "2025-12-09T00:05:18.917380+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:04:39Z\",\n \"avg_ns\": 402848669,\n \"stddev_ns\": 274344,\n \"avg_ts\": 317.737277,\n \"stddev_ts\": 0.215868,\n \"samples_ns\": [ 402541222, 403065394, 402939392 ],\n \"samples_ts\": [ 317.98, 317.566, 317.666 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:04:40Z\",\n \"avg_ns\": 12710945433,\n \"stddev_ns\": 8411317,\n \"avg_ts\": 40.280257,\n \"stddev_ts\": 0.026650,\n \"samples_ns\": [ 12710902792, 12702557029, 12719376480 ],\n \"samples_ts\": [ 40.2804, 40.3068, 40.2535 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:04:39Z", + "avg_ns": 402848669, + "stddev_ns": 274344, + "avg_ts": 317.737277, + "stddev_ts": 0.215868, + "samples_ns": [ + 402541222, + 403065394, + 402939392 + ], + "samples_ts": [ + 317.98, + 317.566, + 317.666 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:04:40Z", + "avg_ns": 12710945433, + "stddev_ns": 8411317, + "avg_ts": 40.280257, + "stddev_ts": 0.02665, + "samples_ns": [ + 12710902792, + 12702557029, + 12719376480 + ], + "samples_ts": [ + 40.2804, + 40.3068, + 40.2535 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 373 + }, + { + "timestamp_utc": "2025-12-09T00:05:36.090357+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:05:19Z\",\n \"avg_ns\": 1689626188,\n \"stddev_ns\": 551486,\n \"avg_ts\": 303.025628,\n \"stddev_ts\": 0.098643,\n \"samples_ns\": [ 1690088092, 1689017836, 1689772637 ],\n \"samples_ts\": [ 302.943, 303.135, 302.999 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:05:26Z\",\n \"avg_ns\": 3171969573,\n \"stddev_ns\": 1013621,\n \"avg_ts\": 40.353479,\n \"stddev_ts\": 0.012856,\n \"samples_ns\": [ 3172893220, 3172125199, 3170890302 ],\n \"samples_ts\": [ 40.3417, 40.3515, 40.3672 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:05:19Z", + "avg_ns": 1689626188, + "stddev_ns": 551486, + "avg_ts": 303.025628, + "stddev_ts": 0.098643, + "samples_ns": [ + 1690088092, + 1689017836, + 1689772637 + ], + "samples_ts": [ + 302.943, + 303.135, + 302.999 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:05:26Z", + "avg_ns": 3171969573, + "stddev_ns": 1013621, + "avg_ts": 40.353479, + "stddev_ts": 0.012856, + "samples_ns": [ + 3172893220, + 3172125199, + 3170890302 + ], + "samples_ts": [ + 40.3417, + 40.3515, + 40.3672 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 374 + }, + { + "timestamp_utc": "2025-12-09T00:06:21.640266+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:05:36Z\",\n \"avg_ns\": 1661510713,\n \"stddev_ns\": 722856,\n \"avg_ts\": 308.153333,\n \"stddev_ts\": 0.133832,\n \"samples_ns\": [ 1661333456, 1660894151, 1662304533 ],\n \"samples_ts\": [ 308.186, 308.268, 308.006 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:05:43Z\",\n \"avg_ns\": 12678265277,\n \"stddev_ns\": 2200020,\n \"avg_ts\": 40.384075,\n \"stddev_ts\": 0.007007,\n \"samples_ns\": [ 12677051516, 12676939496, 12680804819 ],\n \"samples_ts\": [ 40.3879, 40.3883, 40.376 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:05:36Z", + "avg_ns": 1661510713, + "stddev_ns": 722856, + "avg_ts": 308.153333, + "stddev_ts": 0.133832, + "samples_ns": [ + 1661333456, + 1660894151, + 1662304533 + ], + "samples_ts": [ + 308.186, + 308.268, + 308.006 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:05:43Z", + "avg_ns": 12678265277, + "stddev_ns": 2200020, + "avg_ts": 40.384075, + "stddev_ts": 0.007007, + "samples_ns": [ + 12677051516, + 12676939496, + 12680804819 + ], + "samples_ts": [ + 40.3879, + 40.3883, + 40.376 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 375 + }, + { + "timestamp_utc": "2025-12-09T00:06:33.542328+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:06:22Z\",\n \"avg_ns\": 406090110,\n \"stddev_ns\": 404060,\n \"avg_ts\": 315.201186,\n \"stddev_ts\": 0.313627,\n \"samples_ns\": [ 406493893, 405685772, 406090665 ],\n \"samples_ts\": [ 314.888, 315.515, 315.201 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:06:24Z\",\n \"avg_ns\": 3127886995,\n \"stddev_ns\": 679169,\n \"avg_ts\": 40.922196,\n \"stddev_ts\": 0.008825,\n \"samples_ns\": [ 3127243550, 3128588849, 3127828588 ],\n \"samples_ts\": [ 40.9306, 40.913, 40.923 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:06:22Z", + "avg_ns": 406090110, + "stddev_ns": 404060, + "avg_ts": 315.201186, + "stddev_ts": 0.313627, + "samples_ns": [ + 406493893, + 405685772, + 406090665 + ], + "samples_ts": [ + 314.888, + 315.515, + 315.201 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:06:24Z", + "avg_ns": 3127886995, + "stddev_ns": 679169, + "avg_ts": 40.922196, + "stddev_ts": 0.008825, + "samples_ns": [ + 3127243550, + 3128588849, + 3127828588 + ], + "samples_ts": [ + 40.9306, + 40.913, + 40.923 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 376 + }, + { + "timestamp_utc": "2025-12-09T00:07:14.297163+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:06:34Z\",\n \"avg_ns\": 406249643,\n \"stddev_ns\": 183033,\n \"avg_ts\": 315.077242,\n \"stddev_ts\": 0.141083,\n \"samples_ns\": [ 406440349, 406078012, 406230569 ],\n \"samples_ts\": [ 314.929, 315.21, 315.092 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:06:35Z\",\n \"avg_ns\": 12753596509,\n \"stddev_ns\": 3284529,\n \"avg_ts\": 40.145540,\n \"stddev_ts\": 0.010328,\n \"samples_ns\": [ 12750025965, 12756477699, 12754285865 ],\n \"samples_ts\": [ 40.1568, 40.1365, 40.1434 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:06:34Z", + "avg_ns": 406249643, + "stddev_ns": 183033, + "avg_ts": 315.077242, + "stddev_ts": 0.141083, + "samples_ns": [ + 406440349, + 406078012, + 406230569 + ], + "samples_ts": [ + 314.929, + 315.21, + 315.092 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:06:35Z", + "avg_ns": 12753596509, + "stddev_ns": 3284529, + "avg_ts": 40.14554, + "stddev_ts": 0.010328, + "samples_ns": [ + 12750025965, + 12756477699, + 12754285865 + ], + "samples_ts": [ + 40.1568, + 40.1365, + 40.1434 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 377 + }, + { + "timestamp_utc": "2025-12-09T00:07:31.463378+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:07:15Z\",\n \"avg_ns\": 1696558698,\n \"stddev_ns\": 548287,\n \"avg_ts\": 301.787398,\n \"stddev_ts\": 0.097546,\n \"samples_ns\": [ 1696768888, 1695936414, 1696970792 ],\n \"samples_ts\": [ 301.75, 301.898, 301.714 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:07:21Z\",\n \"avg_ns\": 3161699723,\n \"stddev_ns\": 1217876,\n \"avg_ts\": 40.484557,\n \"stddev_ts\": 0.015591,\n \"samples_ns\": [ 3161094215, 3160903276, 3163101678 ],\n \"samples_ts\": [ 40.4923, 40.4948, 40.4666 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:07:15Z", + "avg_ns": 1696558698, + "stddev_ns": 548287, + "avg_ts": 301.787398, + "stddev_ts": 0.097546, + "samples_ns": [ + 1696768888, + 1695936414, + 1696970792 + ], + "samples_ts": [ + 301.75, + 301.898, + 301.714 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:07:21Z", + "avg_ns": 3161699723, + "stddev_ns": 1217876, + "avg_ts": 40.484557, + "stddev_ts": 0.015591, + "samples_ns": [ + 3161094215, + 3160903276, + 3163101678 + ], + "samples_ts": [ + 40.4923, + 40.4948, + 40.4666 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 378 + }, + { + "timestamp_utc": "2025-12-09T00:08:17.491213+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:07:32Z\",\n \"avg_ns\": 1696019914,\n \"stddev_ns\": 740207,\n \"avg_ts\": 301.883286,\n \"stddev_ts\": 0.131530,\n \"samples_ns\": [ 1695387324, 1695840141, 1696832278 ],\n \"samples_ts\": [ 301.996, 301.915, 301.739 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:07:39Z\",\n \"avg_ns\": 12790883250,\n \"stddev_ns\": 9623884,\n \"avg_ts\": 40.028525,\n \"stddev_ts\": 0.030130,\n \"samples_ns\": [ 12795833860, 12797024078, 12779791812 ],\n \"samples_ts\": [ 40.013, 40.0093, 40.0633 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:07:32Z", + "avg_ns": 1696019914, + "stddev_ns": 740207, + "avg_ts": 301.883286, + "stddev_ts": 0.13153, + "samples_ns": [ + 1695387324, + 1695840141, + 1696832278 + ], + "samples_ts": [ + 301.996, + 301.915, + 301.739 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:07:39Z", + "avg_ns": 12790883250, + "stddev_ns": 9623884, + "avg_ts": 40.028525, + "stddev_ts": 0.03013, + "samples_ns": [ + 12795833860, + 12797024078, + 12779791812 + ], + "samples_ts": [ + 40.013, + 40.0093, + 40.0633 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 379 + }, + { + "timestamp_utc": "2025-12-09T00:08:29.495896+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:08:18Z\",\n \"avg_ns\": 406744079,\n \"stddev_ns\": 69763,\n \"avg_ts\": 314.694200,\n \"stddev_ts\": 0.053972,\n \"samples_ns\": [ 406819938, 406682677, 406729622 ],\n \"samples_ts\": [ 314.636, 314.742, 314.705 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:08:19Z\",\n \"avg_ns\": 3160749220,\n \"stddev_ns\": 1118173,\n \"avg_ts\": 40.496731,\n \"stddev_ts\": 0.014311,\n \"samples_ns\": [ 3161154105, 3161607062, 3159486494 ],\n \"samples_ts\": [ 40.4915, 40.4857, 40.5129 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:08:18Z", + "avg_ns": 406744079, + "stddev_ns": 69763, + "avg_ts": 314.6942, + "stddev_ts": 0.053972, + "samples_ns": [ + 406819938, + 406682677, + 406729622 + ], + "samples_ts": [ + 314.636, + 314.742, + 314.705 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:08:19Z", + "avg_ns": 3160749220, + "stddev_ns": 1118173, + "avg_ts": 40.496731, + "stddev_ts": 0.014311, + "samples_ns": [ + 3161154105, + 3161607062, + 3159486494 + ], + "samples_ts": [ + 40.4915, + 40.4857, + 40.5129 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 380 + }, + { + "timestamp_utc": "2025-12-09T00:09:10.429220+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:08:30Z\",\n \"avg_ns\": 408026618,\n \"stddev_ns\": 469621,\n \"avg_ts\": 313.705300,\n \"stddev_ts\": 0.360492,\n \"samples_ns\": [ 408567359, 407785165, 407727331 ],\n \"samples_ts\": [ 313.29, 313.891, 313.935 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:08:31Z\",\n \"avg_ns\": 12809842705,\n \"stddev_ns\": 13047020,\n \"avg_ts\": 39.969293,\n \"stddev_ts\": 0.040717,\n \"samples_ns\": [ 12795904667, 12811860941, 12821762508 ],\n \"samples_ts\": [ 40.0128, 39.963, 39.9321 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:08:30Z", + "avg_ns": 408026618, + "stddev_ns": 469621, + "avg_ts": 313.7053, + "stddev_ts": 0.360492, + "samples_ns": [ + 408567359, + 407785165, + 407727331 + ], + "samples_ts": [ + 313.29, + 313.891, + 313.935 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:08:31Z", + "avg_ns": 12809842705, + "stddev_ns": 13047020, + "avg_ts": 39.969293, + "stddev_ts": 0.040717, + "samples_ns": [ + 12795904667, + 12811860941, + 12821762508 + ], + "samples_ts": [ + 40.0128, + 39.963, + 39.9321 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 381 + }, + { + "timestamp_utc": "2025-12-09T00:09:28.336429+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:09:11Z\",\n \"avg_ns\": 1880303819,\n \"stddev_ns\": 2156657,\n \"avg_ts\": 272.296659,\n \"stddev_ts\": 0.312331,\n \"samples_ns\": [ 1880916348, 1877908054, 1882087057 ],\n \"samples_ts\": [ 272.208, 272.644, 272.038 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:09:18Z\",\n \"avg_ns\": 3155975042,\n \"stddev_ns\": 1579620,\n \"avg_ts\": 40.557995,\n \"stddev_ts\": 0.020272,\n \"samples_ns\": [ 3155771308, 3157644634, 3154509186 ],\n \"samples_ts\": [ 40.5606, 40.5365, 40.5768 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:09:11Z", + "avg_ns": 1880303819, + "stddev_ns": 2156657, + "avg_ts": 272.296659, + "stddev_ts": 0.312331, + "samples_ns": [ + 1880916348, + 1877908054, + 1882087057 + ], + "samples_ts": [ + 272.208, + 272.644, + 272.038 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:09:18Z", + "avg_ns": 3155975042, + "stddev_ns": 1579620, + "avg_ts": 40.557995, + "stddev_ts": 0.020272, + "samples_ns": [ + 3155771308, + 3157644634, + 3154509186 + ], + "samples_ts": [ + 40.5606, + 40.5365, + 40.5768 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 382 + }, + { + "timestamp_utc": "2025-12-09T00:10:15.372087+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:09:29Z\",\n \"avg_ns\": 1881396919,\n \"stddev_ns\": 1188577,\n \"avg_ts\": 272.138288,\n \"stddev_ts\": 0.171752,\n \"samples_ns\": [ 1880870466, 1882756946, 1880563346 ],\n \"samples_ts\": [ 272.214, 271.942, 272.259 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:09:36Z\",\n \"avg_ns\": 12880979133,\n \"stddev_ns\": 7043717,\n \"avg_ts\": 39.748539,\n \"stddev_ts\": 0.021729,\n \"samples_ns\": [ 12874985704, 12879215577, 12888736119 ],\n \"samples_ts\": [ 39.767, 39.754, 39.7246 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:09:29Z", + "avg_ns": 1881396919, + "stddev_ns": 1188577, + "avg_ts": 272.138288, + "stddev_ts": 0.171752, + "samples_ns": [ + 1880870466, + 1882756946, + 1880563346 + ], + "samples_ts": [ + 272.214, + 271.942, + 272.259 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:09:36Z", + "avg_ns": 12880979133, + "stddev_ns": 7043717, + "avg_ts": 39.748539, + "stddev_ts": 0.021729, + "samples_ns": [ + 12874985704, + 12879215577, + 12888736119 + ], + "samples_ts": [ + 39.767, + 39.754, + 39.7246 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 383 + }, + { + "timestamp_utc": "2025-12-09T00:10:27.269276+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:10:16Z\",\n \"avg_ns\": 406382910,\n \"stddev_ns\": 536572,\n \"avg_ts\": 314.974240,\n \"stddev_ts\": 0.414996,\n \"samples_ns\": [ 406012962, 406997362, 406138408 ],\n \"samples_ts\": [ 315.261, 314.498, 315.163 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:10:17Z\",\n \"avg_ns\": 3125886796,\n \"stddev_ns\": 502172,\n \"avg_ts\": 40.948381,\n \"stddev_ts\": 0.006496,\n \"samples_ns\": [ 3126458545, 3125573472, 3125628373 ],\n \"samples_ts\": [ 40.9409, 40.9525, 40.9518 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:10:16Z", + "avg_ns": 406382910, + "stddev_ns": 536572, + "avg_ts": 314.97424, + "stddev_ts": 0.414996, + "samples_ns": [ + 406012962, + 406997362, + 406138408 + ], + "samples_ts": [ + 315.261, + 314.498, + 315.163 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:10:17Z", + "avg_ns": 3125886796, + "stddev_ns": 502172, + "avg_ts": 40.948381, + "stddev_ts": 0.006496, + "samples_ns": [ + 3126458545, + 3125573472, + 3125628373 + ], + "samples_ts": [ + 40.9409, + 40.9525, + 40.9518 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 384 + }, + { + "timestamp_utc": "2025-12-09T00:11:08.486218+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:10:28Z\",\n \"avg_ns\": 406650866,\n \"stddev_ns\": 490526,\n \"avg_ts\": 314.766633,\n \"stddev_ts\": 0.379253,\n \"samples_ns\": [ 406564668, 407178359, 406209572 ],\n \"samples_ts\": [ 314.833, 314.359, 315.108 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:10:29Z\",\n \"avg_ns\": 12899122155,\n \"stddev_ns\": 3508375,\n \"avg_ts\": 39.692626,\n \"stddev_ts\": 0.010796,\n \"samples_ns\": [ 12895631702, 12899086580, 12902648183 ],\n \"samples_ts\": [ 39.7034, 39.6927, 39.6818 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:10:28Z", + "avg_ns": 406650866, + "stddev_ns": 490526, + "avg_ts": 314.766633, + "stddev_ts": 0.379253, + "samples_ns": [ + 406564668, + 407178359, + 406209572 + ], + "samples_ts": [ + 314.833, + 314.359, + 315.108 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:10:29Z", + "avg_ns": 12899122155, + "stddev_ns": 3508375, + "avg_ts": 39.692626, + "stddev_ts": 0.010796, + "samples_ns": [ + 12895631702, + 12899086580, + 12902648183 + ], + "samples_ts": [ + 39.7034, + 39.6927, + 39.6818 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 385 + }, + { + "timestamp_utc": "2025-12-09T00:11:25.634331+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:11:09Z\",\n \"avg_ns\": 1689908195,\n \"stddev_ns\": 350456,\n \"avg_ts\": 302.975047,\n \"stddev_ts\": 0.062837,\n \"samples_ns\": [ 1690020476, 1689515358, 1690188751 ],\n \"samples_ts\": [ 302.955, 303.045, 302.925 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:11:16Z\",\n \"avg_ns\": 3163760953,\n \"stddev_ns\": 1998077,\n \"avg_ts\": 40.458188,\n \"stddev_ts\": 0.025544,\n \"samples_ns\": [ 3163989555, 3165634102, 3161659203 ],\n \"samples_ts\": [ 40.4553, 40.4342, 40.4851 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:11:09Z", + "avg_ns": 1689908195, + "stddev_ns": 350456, + "avg_ts": 302.975047, + "stddev_ts": 0.062837, + "samples_ns": [ + 1690020476, + 1689515358, + 1690188751 + ], + "samples_ts": [ + 302.955, + 303.045, + 302.925 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:11:16Z", + "avg_ns": 3163760953, + "stddev_ns": 1998077, + "avg_ts": 40.458188, + "stddev_ts": 0.025544, + "samples_ns": [ + 3163989555, + 3165634102, + 3161659203 + ], + "samples_ts": [ + 40.4553, + 40.4342, + 40.4851 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 386 + }, + { + "timestamp_utc": "2025-12-09T00:12:12.025222+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:11:26Z\",\n \"avg_ns\": 1691047656,\n \"stddev_ns\": 323954,\n \"avg_ts\": 302.770895,\n \"stddev_ts\": 0.057537,\n \"samples_ns\": [ 1690686963, 1691152633, 1691303373 ],\n \"samples_ts\": [ 302.835, 302.752, 302.725 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:11:33Z\",\n \"avg_ns\": 12907942385,\n \"stddev_ns\": 8617794,\n \"avg_ts\": 39.665513,\n \"stddev_ts\": 0.026474,\n \"samples_ns\": [ 12917079837, 12906782589, 12899964731 ],\n \"samples_ts\": [ 39.6374, 39.6691, 39.69 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:11:26Z", + "avg_ns": 1691047656, + "stddev_ns": 323954, + "avg_ts": 302.770895, + "stddev_ts": 0.057537, + "samples_ns": [ + 1690686963, + 1691152633, + 1691303373 + ], + "samples_ts": [ + 302.835, + 302.752, + 302.725 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:11:33Z", + "avg_ns": 12907942385, + "stddev_ns": 8617794, + "avg_ts": 39.665513, + "stddev_ts": 0.026474, + "samples_ns": [ + 12917079837, + 12906782589, + 12899964731 + ], + "samples_ts": [ + 39.6374, + 39.6691, + 39.69 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 387 + }, + { + "timestamp_utc": "2025-12-09T00:12:24.071817+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:12:12Z\",\n \"avg_ns\": 406736616,\n \"stddev_ns\": 161135,\n \"avg_ts\": 314.700000,\n \"stddev_ts\": 0.123666,\n \"samples_ns\": [ 406920143, 406627640, 406662066 ],\n \"samples_ts\": [ 314.558, 314.784, 314.758 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:12:14Z\",\n \"avg_ns\": 3174619409,\n \"stddev_ns\": 46593,\n \"avg_ts\": 40.319794,\n \"stddev_ts\": 0.000592,\n \"samples_ns\": [ 3174610193, 3174578112, 3174669922 ],\n \"samples_ts\": [ 40.3199, 40.3203, 40.3192 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:12:12Z", + "avg_ns": 406736616, + "stddev_ns": 161135, + "avg_ts": 314.7, + "stddev_ts": 0.123666, + "samples_ns": [ + 406920143, + 406627640, + 406662066 + ], + "samples_ts": [ + 314.558, + 314.784, + 314.758 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:12:14Z", + "avg_ns": 3174619409, + "stddev_ns": 46593, + "avg_ts": 40.319794, + "stddev_ts": 0.000592, + "samples_ns": [ + 3174610193, + 3174578112, + 3174669922 + ], + "samples_ts": [ + 40.3199, + 40.3203, + 40.3192 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 388 + }, + { + "timestamp_utc": "2025-12-09T00:13:05.199097+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:12:24Z\",\n \"avg_ns\": 408950185,\n \"stddev_ns\": 433017,\n \"avg_ts\": 312.996790,\n \"stddev_ts\": 0.330905,\n \"samples_ns\": [ 408603027, 408812789, 409434740 ],\n \"samples_ts\": [ 313.262, 313.102, 312.626 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:12:26Z\",\n \"avg_ns\": 12851774343,\n \"stddev_ns\": 3039679,\n \"avg_ts\": 39.838858,\n \"stddev_ts\": 0.009409,\n \"samples_ns\": [ 12849045410, 12851233912, 12855043709 ],\n \"samples_ts\": [ 39.8473, 39.8405, 39.8287 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:12:24Z", + "avg_ns": 408950185, + "stddev_ns": 433017, + "avg_ts": 312.99679, + "stddev_ts": 0.330905, + "samples_ns": [ + 408603027, + 408812789, + 409434740 + ], + "samples_ts": [ + 313.262, + 313.102, + 312.626 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:12:26Z", + "avg_ns": 12851774343, + "stddev_ns": 3039679, + "avg_ts": 39.838858, + "stddev_ts": 0.009409, + "samples_ns": [ + 12849045410, + 12851233912, + 12855043709 + ], + "samples_ts": [ + 39.8473, + 39.8405, + 39.8287 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 389 + }, + { + "timestamp_utc": "2025-12-09T00:13:22.457963+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:13:06Z\",\n \"avg_ns\": 1731407166,\n \"stddev_ns\": 643921,\n \"avg_ts\": 295.713254,\n \"stddev_ts\": 0.109525,\n \"samples_ns\": [ 1732001168, 1731492976, 1730727356 ],\n \"samples_ts\": [ 295.612, 295.699, 295.829 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:13:12Z\",\n \"avg_ns\": 3138398191,\n \"stddev_ns\": 582251,\n \"avg_ts\": 40.785138,\n \"stddev_ts\": 0.007496,\n \"samples_ns\": [ 3138395870, 3138976185, 3137822520 ],\n \"samples_ts\": [ 40.7852, 40.7776, 40.7926 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:13:06Z", + "avg_ns": 1731407166, + "stddev_ns": 643921, + "avg_ts": 295.713254, + "stddev_ts": 0.109525, + "samples_ns": [ + 1732001168, + 1731492976, + 1730727356 + ], + "samples_ts": [ + 295.612, + 295.699, + 295.829 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:13:12Z", + "avg_ns": 3138398191, + "stddev_ns": 582251, + "avg_ts": 40.785138, + "stddev_ts": 0.007496, + "samples_ns": [ + 3138395870, + 3138976185, + 3137822520 + ], + "samples_ts": [ + 40.7852, + 40.7776, + 40.7926 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 390 + }, + { + "timestamp_utc": "2025-12-09T00:14:09.072489+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:13:23Z\",\n \"avg_ns\": 1731476305,\n \"stddev_ns\": 187874,\n \"avg_ts\": 295.701421,\n \"stddev_ts\": 0.030472,\n \"samples_ns\": [ 1731587296, 1731270495, 1731571126 ],\n \"samples_ts\": [ 295.682, 295.737, 295.685 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:13:30Z\",\n \"avg_ns\": 12929457442,\n \"stddev_ns\": 7160967,\n \"avg_ts\": 39.599504,\n \"stddev_ts\": 0.021922,\n \"samples_ns\": [ 12925249398, 12925398203, 12937724726 ],\n \"samples_ts\": [ 39.6124, 39.6119, 39.5742 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:13:23Z", + "avg_ns": 1731476305, + "stddev_ns": 187874, + "avg_ts": 295.701421, + "stddev_ts": 0.030472, + "samples_ns": [ + 1731587296, + 1731270495, + 1731571126 + ], + "samples_ts": [ + 295.682, + 295.737, + 295.685 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:13:30Z", + "avg_ns": 12929457442, + "stddev_ns": 7160967, + "avg_ts": 39.599504, + "stddev_ts": 0.021922, + "samples_ns": [ + 12925249398, + 12925398203, + 12937724726 + ], + "samples_ts": [ + 39.6124, + 39.6119, + 39.5742 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 391 + }, + { + "timestamp_utc": "2025-12-09T00:14:20.955449+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:14:09Z\",\n \"avg_ns\": 406371025,\n \"stddev_ns\": 538036,\n \"avg_ts\": 314.983455,\n \"stddev_ts\": 0.416753,\n \"samples_ns\": [ 405831162, 406906451, 406375463 ],\n \"samples_ts\": [ 315.402, 314.569, 314.98 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:14:11Z\",\n \"avg_ns\": 3122084052,\n \"stddev_ns\": 1274640,\n \"avg_ts\": 40.998260,\n \"stddev_ts\": 0.016737,\n \"samples_ns\": [ 3123447089, 3121883476, 3120921591 ],\n \"samples_ts\": [ 40.9804, 41.0009, 41.0135 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:14:09Z", + "avg_ns": 406371025, + "stddev_ns": 538036, + "avg_ts": 314.983455, + "stddev_ts": 0.416753, + "samples_ns": [ + 405831162, + 406906451, + 406375463 + ], + "samples_ts": [ + 315.402, + 314.569, + 314.98 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:14:11Z", + "avg_ns": 3122084052, + "stddev_ns": 1274640, + "avg_ts": 40.99826, + "stddev_ts": 0.016737, + "samples_ns": [ + 3123447089, + 3121883476, + 3120921591 + ], + "samples_ts": [ + 40.9804, + 41.0009, + 41.0135 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 392 + }, + { + "timestamp_utc": "2025-12-09T00:15:01.751143+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:14:21Z\",\n \"avg_ns\": 406178015,\n \"stddev_ns\": 257908,\n \"avg_ts\": 315.132847,\n \"stddev_ts\": 0.200025,\n \"samples_ns\": [ 406035847, 406022476, 406475722 ],\n \"samples_ts\": [ 315.243, 315.253, 314.902 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:14:23Z\",\n \"avg_ns\": 12757378521,\n \"stddev_ns\": 8600849,\n \"avg_ts\": 40.133649,\n \"stddev_ts\": 0.027062,\n \"samples_ns\": [ 12748094280, 12758966666, 12765074617 ],\n \"samples_ts\": [ 40.1629, 40.1286, 40.1094 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:14:21Z", + "avg_ns": 406178015, + "stddev_ns": 257908, + "avg_ts": 315.132847, + "stddev_ts": 0.200025, + "samples_ns": [ + 406035847, + 406022476, + 406475722 + ], + "samples_ts": [ + 315.243, + 315.253, + 314.902 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:14:23Z", + "avg_ns": 12757378521, + "stddev_ns": 8600849, + "avg_ts": 40.133649, + "stddev_ts": 0.027062, + "samples_ns": [ + 12748094280, + 12758966666, + 12765074617 + ], + "samples_ts": [ + 40.1629, + 40.1286, + 40.1094 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 393 + }, + { + "timestamp_utc": "2025-12-09T00:15:19.445237+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:15:02Z\",\n \"avg_ns\": 1831907473,\n \"stddev_ns\": 1969321,\n \"avg_ts\": 279.490313,\n \"stddev_ts\": 0.300201,\n \"samples_ns\": [ 1834073225, 1831421845, 1830227351 ],\n \"samples_ts\": [ 279.16, 279.564, 279.747 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:15:09Z\",\n \"avg_ns\": 3156531698,\n \"stddev_ns\": 2055951,\n \"avg_ts\": 40.550848,\n \"stddev_ts\": 0.026390,\n \"samples_ns\": [ 3156336504, 3158676745, 3154581847 ],\n \"samples_ts\": [ 40.5533, 40.5233, 40.5759 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:15:02Z", + "avg_ns": 1831907473, + "stddev_ns": 1969321, + "avg_ts": 279.490313, + "stddev_ts": 0.300201, + "samples_ns": [ + 1834073225, + 1831421845, + 1830227351 + ], + "samples_ts": [ + 279.16, + 279.564, + 279.747 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:15:09Z", + "avg_ns": 3156531698, + "stddev_ns": 2055951, + "avg_ts": 40.550848, + "stddev_ts": 0.02639, + "samples_ns": [ + 3156336504, + 3158676745, + 3154581847 + ], + "samples_ts": [ + 40.5533, + 40.5233, + 40.5759 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 394 + }, + { + "timestamp_utc": "2025-12-09T00:16:05.936316+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:15:20Z\",\n \"avg_ns\": 1826788205,\n \"stddev_ns\": 1816849,\n \"avg_ts\": 280.273507,\n \"stddev_ts\": 0.278751,\n \"samples_ns\": [ 1826797120, 1828600581, 1824966914 ],\n \"samples_ts\": [ 280.272, 279.996, 280.553 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:15:27Z\",\n \"avg_ns\": 12758557499,\n \"stddev_ns\": 3612667,\n \"avg_ts\": 40.129931,\n \"stddev_ts\": 0.011356,\n \"samples_ns\": [ 12762693143, 12756948964, 12756030391 ],\n \"samples_ts\": [ 40.1169, 40.135, 40.1379 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:15:20Z", + "avg_ns": 1826788205, + "stddev_ns": 1816849, + "avg_ts": 280.273507, + "stddev_ts": 0.278751, + "samples_ns": [ + 1826797120, + 1828600581, + 1824966914 + ], + "samples_ts": [ + 280.272, + 279.996, + 280.553 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:15:27Z", + "avg_ns": 12758557499, + "stddev_ns": 3612667, + "avg_ts": 40.129931, + "stddev_ts": 0.011356, + "samples_ns": [ + 12762693143, + 12756948964, + 12756030391 + ], + "samples_ts": [ + 40.1169, + 40.135, + 40.1379 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 395 + }, + { + "timestamp_utc": "2025-12-09T00:16:18.915335+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:16:06Z\",\n \"avg_ns\": 325874926,\n \"stddev_ns\": 456170,\n \"avg_ts\": 392.789246,\n \"stddev_ts\": 0.549064,\n \"samples_ns\": [ 325909378, 325403224, 326312178 ],\n \"samples_ts\": [ 392.747, 393.358, 392.262 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:16:08Z\",\n \"avg_ns\": 3591041539,\n \"stddev_ns\": 8076362,\n \"avg_ts\": 35.644375,\n \"stddev_ts\": 0.080095,\n \"samples_ns\": [ 3584234958, 3599965092, 3588924569 ],\n \"samples_ts\": [ 35.7119, 35.5559, 35.6653 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:16:06Z", + "avg_ns": 325874926, + "stddev_ns": 456170, + "avg_ts": 392.789246, + "stddev_ts": 0.549064, + "samples_ns": [ + 325909378, + 325403224, + 326312178 + ], + "samples_ts": [ + 392.747, + 393.358, + 392.262 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:16:08Z", + "avg_ns": 3591041539, + "stddev_ns": 8076362, + "avg_ts": 35.644375, + "stddev_ts": 0.080095, + "samples_ns": [ + 3584234958, + 3599965092, + 3588924569 + ], + "samples_ts": [ + 35.7119, + 35.5559, + 35.6653 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 396 + }, + { + "timestamp_utc": "2025-12-09T00:17:05.333227+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:16:19Z\",\n \"avg_ns\": 323689298,\n \"stddev_ns\": 3090734,\n \"avg_ts\": 395.464844,\n \"stddev_ts\": 3.755370,\n \"samples_ns\": [ 327257056, 321830299, 321980540 ],\n \"samples_ts\": [ 391.13, 397.725, 397.54 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:16:20Z\",\n \"avg_ns\": 14742751496,\n \"stddev_ns\": 20101370,\n \"avg_ts\": 34.728974,\n \"stddev_ts\": 0.047359,\n \"samples_ns\": [ 14744140644, 14721991584, 14762122260 ],\n \"samples_ts\": [ 34.7257, 34.7779, 34.6834 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:16:19Z", + "avg_ns": 323689298, + "stddev_ns": 3090734, + "avg_ts": 395.464844, + "stddev_ts": 3.75537, + "samples_ns": [ + 327257056, + 321830299, + 321980540 + ], + "samples_ts": [ + 391.13, + 397.725, + 397.54 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:16:20Z", + "avg_ns": 14742751496, + "stddev_ns": 20101370, + "avg_ts": 34.728974, + "stddev_ts": 0.047359, + "samples_ns": [ + 14744140644, + 14721991584, + 14762122260 + ], + "samples_ts": [ + 34.7257, + 34.7779, + 34.6834 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 397 + }, + { + "timestamp_utc": "2025-12-09T00:17:22.453083+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:17:06Z\",\n \"avg_ns\": 1342820971,\n \"stddev_ns\": 3502399,\n \"avg_ts\": 381.288593,\n \"stddev_ts\": 0.992978,\n \"samples_ns\": [ 1340408664, 1346837765, 1341216486 ],\n \"samples_ts\": [ 381.973, 380.15, 381.743 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:17:11Z\",\n \"avg_ns\": 3608504910,\n \"stddev_ns\": 7210367,\n \"avg_ts\": 35.471849,\n \"stddev_ts\": 0.070796,\n \"samples_ns\": [ 3605110696, 3616785456, 3603618580 ],\n \"samples_ts\": [ 35.5052, 35.3905, 35.5199 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:17:06Z", + "avg_ns": 1342820971, + "stddev_ns": 3502399, + "avg_ts": 381.288593, + "stddev_ts": 0.992978, + "samples_ns": [ + 1340408664, + 1346837765, + 1341216486 + ], + "samples_ts": [ + 381.973, + 380.15, + 381.743 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:17:11Z", + "avg_ns": 3608504910, + "stddev_ns": 7210367, + "avg_ts": 35.471849, + "stddev_ts": 0.070796, + "samples_ns": [ + 3605110696, + 3616785456, + 3603618580 + ], + "samples_ts": [ + 35.5052, + 35.3905, + 35.5199 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 398 + }, + { + "timestamp_utc": "2025-12-09T00:18:13.473834+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:17:23Z\",\n \"avg_ns\": 1346247325,\n \"stddev_ns\": 3632242,\n \"avg_ts\": 380.318291,\n \"stddev_ts\": 1.025117,\n \"samples_ns\": [ 1345417355, 1350222347, 1343102275 ],\n \"samples_ts\": [ 380.551, 379.197, 381.207 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:17:28Z\",\n \"avg_ns\": 14912003986,\n \"stddev_ns\": 5321923,\n \"avg_ts\": 34.334758,\n \"stddev_ts\": 0.012245,\n \"samples_ns\": [ 14909263171, 14918134541, 14908614248 ],\n \"samples_ts\": [ 34.3411, 34.3206, 34.3426 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:17:23Z", + "avg_ns": 1346247325, + "stddev_ns": 3632242, + "avg_ts": 380.318291, + "stddev_ts": 1.025117, + "samples_ns": [ + 1345417355, + 1350222347, + 1343102275 + ], + "samples_ts": [ + 380.551, + 379.197, + 381.207 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:17:28Z", + "avg_ns": 14912003986, + "stddev_ns": 5321923, + "avg_ts": 34.334758, + "stddev_ts": 0.012245, + "samples_ns": [ + 14909263171, + 14918134541, + 14908614248 + ], + "samples_ts": [ + 34.3411, + 34.3206, + 34.3426 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 399 + }, + { + "timestamp_utc": "2025-12-09T00:18:26.444678+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:18:14Z\",\n \"avg_ns\": 323802311,\n \"stddev_ns\": 314311,\n \"avg_ts\": 395.303170,\n \"stddev_ts\": 0.383113,\n \"samples_ns\": [ 323816398, 324108827, 323481709 ],\n \"samples_ts\": [ 395.286, 394.929, 395.695 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:18:15Z\",\n \"avg_ns\": 3579111755,\n \"stddev_ns\": 1760417,\n \"avg_ts\": 35.763069,\n \"stddev_ts\": 0.017591,\n \"samples_ns\": [ 3579146864, 3577334046, 3580854355 ],\n \"samples_ts\": [ 35.7627, 35.7808, 35.7457 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:18:14Z", + "avg_ns": 323802311, + "stddev_ns": 314311, + "avg_ts": 395.30317, + "stddev_ts": 0.383113, + "samples_ns": [ + 323816398, + 324108827, + 323481709 + ], + "samples_ts": [ + 395.286, + 394.929, + 395.695 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:18:15Z", + "avg_ns": 3579111755, + "stddev_ns": 1760417, + "avg_ts": 35.763069, + "stddev_ts": 0.017591, + "samples_ns": [ + 3579146864, + 3577334046, + 3580854355 + ], + "samples_ts": [ + 35.7627, + 35.7808, + 35.7457 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 400 + }, + { + "timestamp_utc": "2025-12-09T00:19:13.134775+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:18:27Z\",\n \"avg_ns\": 329588201,\n \"stddev_ns\": 535306,\n \"avg_ts\": 388.364098,\n \"stddev_ts\": 0.630196,\n \"samples_ns\": [ 329641678, 330094145, 329028782 ],\n \"samples_ts\": [ 388.3, 387.768, 389.024 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:18:28Z\",\n \"avg_ns\": 14822870674,\n \"stddev_ns\": 15547813,\n \"avg_ts\": 34.541243,\n \"stddev_ts\": 0.036208,\n \"samples_ns\": [ 14813862689, 14840823155, 14813926179 ],\n \"samples_ts\": [ 34.5622, 34.4994, 34.5621 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:18:27Z", + "avg_ns": 329588201, + "stddev_ns": 535306, + "avg_ts": 388.364098, + "stddev_ts": 0.630196, + "samples_ns": [ + 329641678, + 330094145, + 329028782 + ], + "samples_ts": [ + 388.3, + 387.768, + 389.024 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:18:28Z", + "avg_ns": 14822870674, + "stddev_ns": 15547813, + "avg_ts": 34.541243, + "stddev_ts": 0.036208, + "samples_ns": [ + 14813862689, + 14840823155, + 14813926179 + ], + "samples_ts": [ + 34.5622, + 34.4994, + 34.5621 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 401 + }, + { + "timestamp_utc": "2025-12-09T00:19:30.512451+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:19:13Z\",\n \"avg_ns\": 1401171643,\n \"stddev_ns\": 2109501,\n \"avg_ts\": 365.409032,\n \"stddev_ts\": 0.550427,\n \"samples_ns\": [ 1398742805, 1402235228, 1402536898 ],\n \"samples_ts\": [ 366.043, 365.131, 365.053 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:19:19Z\",\n \"avg_ns\": 3618155475,\n \"stddev_ns\": 8426346,\n \"avg_ts\": 35.377270,\n \"stddev_ts\": 0.082280,\n \"samples_ns\": [ 3612708079, 3613897476, 3627860871 ],\n \"samples_ts\": [ 35.4305, 35.4188, 35.2825 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:19:13Z", + "avg_ns": 1401171643, + "stddev_ns": 2109501, + "avg_ts": 365.409032, + "stddev_ts": 0.550427, + "samples_ns": [ + 1398742805, + 1402235228, + 1402536898 + ], + "samples_ts": [ + 366.043, + 365.131, + 365.053 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:19:19Z", + "avg_ns": 3618155475, + "stddev_ns": 8426346, + "avg_ts": 35.37727, + "stddev_ts": 0.08228, + "samples_ns": [ + 3612708079, + 3613897476, + 3627860871 + ], + "samples_ts": [ + 35.4305, + 35.4188, + 35.2825 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 402 + }, + { + "timestamp_utc": "2025-12-09T00:20:20.560802+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:19:31Z\",\n \"avg_ns\": 1362840533,\n \"stddev_ns\": 1420602,\n \"avg_ts\": 375.686192,\n \"stddev_ts\": 0.391498,\n \"samples_ns\": [ 1362888183, 1364236232, 1361397185 ],\n \"samples_ts\": [ 375.673, 375.302, 376.084 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:19:36Z\",\n \"avg_ns\": 14567335517,\n \"stddev_ns\": 24594108,\n \"avg_ts\": 35.147194,\n \"stddev_ts\": 0.059393,\n \"samples_ns\": [ 14539117721, 14578674469, 14584214362 ],\n \"samples_ts\": [ 35.2153, 35.1198, 35.1065 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:19:31Z", + "avg_ns": 1362840533, + "stddev_ns": 1420602, + "avg_ts": 375.686192, + "stddev_ts": 0.391498, + "samples_ns": [ + 1362888183, + 1364236232, + 1361397185 + ], + "samples_ts": [ + 375.673, + 375.302, + 376.084 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:19:36Z", + "avg_ns": 14567335517, + "stddev_ns": 24594108, + "avg_ts": 35.147194, + "stddev_ts": 0.059393, + "samples_ns": [ + 14539117721, + 14578674469, + 14584214362 + ], + "samples_ts": [ + 35.2153, + 35.1198, + 35.1065 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 403 + }, + { + "timestamp_utc": "2025-12-09T00:20:33.530965+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:20:21Z\",\n \"avg_ns\": 325282297,\n \"stddev_ns\": 259409,\n \"avg_ts\": 393.504520,\n \"stddev_ts\": 0.313777,\n \"samples_ns\": [ 325254605, 325554442, 325037844 ],\n \"samples_ts\": [ 393.538, 393.175, 393.8 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:20:22Z\",\n \"avg_ns\": 3588297542,\n \"stddev_ns\": 7956519,\n \"avg_ts\": 35.671629,\n \"stddev_ts\": 0.078996,\n \"samples_ns\": [ 3583469565, 3597480886, 3583942175 ],\n \"samples_ts\": [ 35.7196, 35.5805, 35.7149 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:20:21Z", + "avg_ns": 325282297, + "stddev_ns": 259409, + "avg_ts": 393.50452, + "stddev_ts": 0.313777, + "samples_ns": [ + 325254605, + 325554442, + 325037844 + ], + "samples_ts": [ + 393.538, + 393.175, + 393.8 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:20:22Z", + "avg_ns": 3588297542, + "stddev_ns": 7956519, + "avg_ts": 35.671629, + "stddev_ts": 0.078996, + "samples_ns": [ + 3583469565, + 3597480886, + 3583942175 + ], + "samples_ts": [ + 35.7196, + 35.5805, + 35.7149 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 404 + }, + { + "timestamp_utc": "2025-12-09T00:21:19.362099+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:20:34Z\",\n \"avg_ns\": 322742771,\n \"stddev_ns\": 660690,\n \"avg_ts\": 396.601781,\n \"stddev_ts\": 0.812132,\n \"samples_ns\": [ 322807692, 323368604, 322052017 ],\n \"samples_ts\": [ 396.521, 395.833, 397.451 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:20:35Z\",\n \"avg_ns\": 14548721320,\n \"stddev_ns\": 8968225,\n \"avg_ts\": 35.192105,\n \"stddev_ts\": 0.021686,\n \"samples_ns\": [ 14558710676, 14546087596, 14541365689 ],\n \"samples_ts\": [ 35.1679, 35.1985, 35.2099 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:20:34Z", + "avg_ns": 322742771, + "stddev_ns": 660690, + "avg_ts": 396.601781, + "stddev_ts": 0.812132, + "samples_ns": [ + 322807692, + 323368604, + 322052017 + ], + "samples_ts": [ + 396.521, + 395.833, + 397.451 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:20:35Z", + "avg_ns": 14548721320, + "stddev_ns": 8968225, + "avg_ts": 35.192105, + "stddev_ts": 0.021686, + "samples_ns": [ + 14558710676, + 14546087596, + 14541365689 + ], + "samples_ts": [ + 35.1679, + 35.1985, + 35.2099 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 405 + }, + { + "timestamp_utc": "2025-12-09T00:21:37.223410+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:21:20Z\",\n \"avg_ns\": 1535351070,\n \"stddev_ns\": 3929197,\n \"avg_ts\": 333.475676,\n \"stddev_ts\": 0.853882,\n \"samples_ns\": [ 1531127005, 1536030108, 1538896099 ],\n \"samples_ts\": [ 334.394, 333.327, 332.706 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:21:26Z\",\n \"avg_ns\": 3598928653,\n \"stddev_ns\": 4323609,\n \"avg_ts\": 35.566174,\n \"stddev_ts\": 0.042697,\n \"samples_ns\": [ 3603790736, 3595519595, 3597475630 ],\n \"samples_ts\": [ 35.5182, 35.5999, 35.5805 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:21:20Z", + "avg_ns": 1535351070, + "stddev_ns": 3929197, + "avg_ts": 333.475676, + "stddev_ts": 0.853882, + "samples_ns": [ + 1531127005, + 1536030108, + 1538896099 + ], + "samples_ts": [ + 334.394, + 333.327, + 332.706 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:21:26Z", + "avg_ns": 3598928653, + "stddev_ns": 4323609, + "avg_ts": 35.566174, + "stddev_ts": 0.042697, + "samples_ns": [ + 3603790736, + 3595519595, + 3597475630 + ], + "samples_ts": [ + 35.5182, + 35.5999, + 35.5805 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 406 + }, + { + "timestamp_utc": "2025-12-09T00:22:28.424474+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:21:38Z\",\n \"avg_ns\": 1589283507,\n \"stddev_ns\": 1179100,\n \"avg_ts\": 322.157869,\n \"stddev_ts\": 0.238695,\n \"samples_ns\": [ 1590547181, 1588216417, 1589086925 ],\n \"samples_ts\": [ 321.902, 322.374, 322.198 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:21:44Z\",\n \"avg_ns\": 14645973490,\n \"stddev_ns\": 19797848,\n \"avg_ts\": 34.958456,\n \"stddev_ts\": 0.047265,\n \"samples_ns\": [ 14625203441, 14648088451, 14664628579 ],\n \"samples_ts\": [ 35.0081, 34.9534, 34.9139 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:21:38Z", + "avg_ns": 1589283507, + "stddev_ns": 1179100, + "avg_ts": 322.157869, + "stddev_ts": 0.238695, + "samples_ns": [ + 1590547181, + 1588216417, + 1589086925 + ], + "samples_ts": [ + 321.902, + 322.374, + 322.198 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:21:44Z", + "avg_ns": 14645973490, + "stddev_ns": 19797848, + "avg_ts": 34.958456, + "stddev_ts": 0.047265, + "samples_ns": [ + 14625203441, + 14648088451, + 14664628579 + ], + "samples_ts": [ + 35.0081, + 34.9534, + 34.9139 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 407 + }, + { + "timestamp_utc": "2025-12-09T00:22:41.383799+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:22:29Z\",\n \"avg_ns\": 322894726,\n \"stddev_ns\": 1875411,\n \"avg_ts\": 396.422915,\n \"stddev_ts\": 2.294651,\n \"samples_ns\": [ 321905428, 325057455, 321721297 ],\n \"samples_ts\": [ 397.632, 393.777, 397.86 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:22:30Z\",\n \"avg_ns\": 3589214919,\n \"stddev_ns\": 2874965,\n \"avg_ts\": 35.662410,\n \"stddev_ts\": 0.028553,\n \"samples_ns\": [ 3592083034, 3589226093, 3586335632 ],\n \"samples_ts\": [ 35.6339, 35.6623, 35.691 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:22:29Z", + "avg_ns": 322894726, + "stddev_ns": 1875411, + "avg_ts": 396.422915, + "stddev_ts": 2.294651, + "samples_ns": [ + 321905428, + 325057455, + 321721297 + ], + "samples_ts": [ + 397.632, + 393.777, + 397.86 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:22:30Z", + "avg_ns": 3589214919, + "stddev_ns": 2874965, + "avg_ts": 35.66241, + "stddev_ts": 0.028553, + "samples_ns": [ + 3592083034, + 3589226093, + 3586335632 + ], + "samples_ts": [ + 35.6339, + 35.6623, + 35.691 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 408 + }, + { + "timestamp_utc": "2025-12-09T00:23:27.311596+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:22:42Z\",\n \"avg_ns\": 322701406,\n \"stddev_ns\": 724023,\n \"avg_ts\": 396.652838,\n \"stddev_ts\": 0.888969,\n \"samples_ns\": [ 322036585, 323472080, 322595555 ],\n \"samples_ts\": [ 397.47, 395.706, 396.782 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:22:43Z\",\n \"avg_ns\": 14578874845,\n \"stddev_ns\": 13058101,\n \"avg_ts\": 35.119327,\n \"stddev_ts\": 0.031438,\n \"samples_ns\": [ 14572756442, 14593867560, 14570000535 ],\n \"samples_ts\": [ 35.1341, 35.0832, 35.1407 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:22:42Z", + "avg_ns": 322701406, + "stddev_ns": 724023, + "avg_ts": 396.652838, + "stddev_ts": 0.888969, + "samples_ns": [ + 322036585, + 323472080, + 322595555 + ], + "samples_ts": [ + 397.47, + 395.706, + 396.782 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:22:43Z", + "avg_ns": 14578874845, + "stddev_ns": 13058101, + "avg_ts": 35.119327, + "stddev_ts": 0.031438, + "samples_ns": [ + 14572756442, + 14593867560, + 14570000535 + ], + "samples_ts": [ + 35.1341, + 35.0832, + 35.1407 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 409 + }, + { + "timestamp_utc": "2025-12-09T00:23:44.208278+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:23:28Z\",\n \"avg_ns\": 1309177171,\n \"stddev_ns\": 1934613,\n \"avg_ts\": 391.085910,\n \"stddev_ts\": 0.577429,\n \"samples_ns\": [ 1308009972, 1308111236, 1311410305 ],\n \"samples_ts\": [ 391.434, 391.404, 390.419 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:23:33Z\",\n \"avg_ns\": 3581887136,\n \"stddev_ns\": 5069448,\n \"avg_ts\": 35.735400,\n \"stddev_ts\": 0.050528,\n \"samples_ns\": [ 3578896228, 3579025643, 3587739539 ],\n \"samples_ts\": [ 35.7652, 35.7639, 35.6771 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:23:28Z", + "avg_ns": 1309177171, + "stddev_ns": 1934613, + "avg_ts": 391.08591, + "stddev_ts": 0.577429, + "samples_ns": [ + 1308009972, + 1308111236, + 1311410305 + ], + "samples_ts": [ + 391.434, + 391.404, + 390.419 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:23:33Z", + "avg_ns": 3581887136, + "stddev_ns": 5069448, + "avg_ts": 35.7354, + "stddev_ts": 0.050528, + "samples_ns": [ + 3578896228, + 3579025643, + 3587739539 + ], + "samples_ts": [ + 35.7652, + 35.7639, + 35.6771 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 410 + }, + { + "timestamp_utc": "2025-12-09T00:24:34.337910+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:23:45Z\",\n \"avg_ns\": 1343360561,\n \"stddev_ns\": 648049,\n \"avg_ts\": 381.133773,\n \"stddev_ts\": 0.183814,\n \"samples_ns\": [ 1343063950, 1344103833, 1342913900 ],\n \"samples_ts\": [ 381.218, 380.923, 381.26 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:23:50Z\",\n \"avg_ns\": 14612327250,\n \"stddev_ns\": 7775075,\n \"avg_ts\": 35.038915,\n \"stddev_ts\": 0.018641,\n \"samples_ns\": [ 14619663380, 14604180516, 14613137856 ],\n \"samples_ts\": [ 35.0213, 35.0585, 35.037 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:23:45Z", + "avg_ns": 1343360561, + "stddev_ns": 648049, + "avg_ts": 381.133773, + "stddev_ts": 0.183814, + "samples_ns": [ + 1343063950, + 1344103833, + 1342913900 + ], + "samples_ts": [ + 381.218, + 380.923, + 381.26 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:23:50Z", + "avg_ns": 14612327250, + "stddev_ns": 7775075, + "avg_ts": 35.038915, + "stddev_ts": 0.018641, + "samples_ns": [ + 14619663380, + 14604180516, + 14613137856 + ], + "samples_ts": [ + 35.0213, + 35.0585, + 35.037 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 411 + }, + { + "timestamp_utc": "2025-12-09T00:24:47.326565+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:24:35Z\",\n \"avg_ns\": 321554095,\n \"stddev_ns\": 511701,\n \"avg_ts\": 398.067442,\n \"stddev_ts\": 0.632929,\n \"samples_ns\": [ 321190147, 321332963, 322139175 ],\n \"samples_ts\": [ 398.518, 398.341, 397.344 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:24:36Z\",\n \"avg_ns\": 3598332523,\n \"stddev_ns\": 9078480,\n \"avg_ts\": 35.572183,\n \"stddev_ts\": 0.089662,\n \"samples_ns\": [ 3590712626, 3595908045, 3608376899 ],\n \"samples_ts\": [ 35.6475, 35.596, 35.473 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:24:35Z", + "avg_ns": 321554095, + "stddev_ns": 511701, + "avg_ts": 398.067442, + "stddev_ts": 0.632929, + "samples_ns": [ + 321190147, + 321332963, + 322139175 + ], + "samples_ts": [ + 398.518, + 398.341, + 397.344 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:24:36Z", + "avg_ns": 3598332523, + "stddev_ns": 9078480, + "avg_ts": 35.572183, + "stddev_ts": 0.089662, + "samples_ns": [ + 3590712626, + 3595908045, + 3608376899 + ], + "samples_ts": [ + 35.6475, + 35.596, + 35.473 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 412 + }, + { + "timestamp_utc": "2025-12-09T00:25:33.183810+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:24:48Z\",\n \"avg_ns\": 323100442,\n \"stddev_ns\": 1671314,\n \"avg_ts\": 396.168686,\n \"stddev_ts\": 2.044565,\n \"samples_ns\": [ 321754474, 322575962, 324970892 ],\n \"samples_ts\": [ 397.819, 396.806, 393.881 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:24:49Z\",\n \"avg_ns\": 14553435315,\n \"stddev_ns\": 21298867,\n \"avg_ts\": 35.180747,\n \"stddev_ts\": 0.051516,\n \"samples_ns\": [ 14529703251, 14559713948, 14570888747 ],\n \"samples_ts\": [ 35.2382, 35.1655, 35.1386 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:24:48Z", + "avg_ns": 323100442, + "stddev_ns": 1671314, + "avg_ts": 396.168686, + "stddev_ts": 2.044565, + "samples_ns": [ + 321754474, + 322575962, + 324970892 + ], + "samples_ts": [ + 397.819, + 396.806, + 393.881 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:24:49Z", + "avg_ns": 14553435315, + "stddev_ns": 21298867, + "avg_ts": 35.180747, + "stddev_ts": 0.051516, + "samples_ns": [ + 14529703251, + 14559713948, + 14570888747 + ], + "samples_ts": [ + 35.2382, + 35.1655, + 35.1386 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 413 + }, + { + "timestamp_utc": "2025-12-09T00:25:50.324505+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:25:34Z\",\n \"avg_ns\": 1363328249,\n \"stddev_ns\": 1043603,\n \"avg_ts\": 375.551669,\n \"stddev_ts\": 0.287423,\n \"samples_ns\": [ 1362126569, 1363997774, 1363860405 ],\n \"samples_ts\": [ 375.883, 375.367, 375.405 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:25:39Z\",\n \"avg_ns\": 3578813439,\n \"stddev_ns\": 2125657,\n \"avg_ts\": 35.766053,\n \"stddev_ts\": 0.021220,\n \"samples_ns\": [ 3577864221, 3581246492, 3577329606 ],\n \"samples_ts\": [ 35.7755, 35.7417, 35.7809 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:25:34Z", + "avg_ns": 1363328249, + "stddev_ns": 1043603, + "avg_ts": 375.551669, + "stddev_ts": 0.287423, + "samples_ns": [ + 1362126569, + 1363997774, + 1363860405 + ], + "samples_ts": [ + 375.883, + 375.367, + 375.405 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:25:39Z", + "avg_ns": 3578813439, + "stddev_ns": 2125657, + "avg_ts": 35.766053, + "stddev_ts": 0.02122, + "samples_ns": [ + 3577864221, + 3581246492, + 3577329606 + ], + "samples_ts": [ + 35.7755, + 35.7417, + 35.7809 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 414 + }, + { + "timestamp_utc": "2025-12-09T00:26:40.183281+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:25:51Z\",\n \"avg_ns\": 1338768298,\n \"stddev_ns\": 2455948,\n \"avg_ts\": 382.441942,\n \"stddev_ts\": 0.701273,\n \"samples_ns\": [ 1336418874, 1338568544, 1341317478 ],\n \"samples_ts\": [ 383.113, 382.498, 381.714 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:25:56Z\",\n \"avg_ns\": 14533439195,\n \"stddev_ns\": 15400823,\n \"avg_ts\": 35.229128,\n \"stddev_ts\": 0.037319,\n \"samples_ns\": [ 14550310750, 14529871512, 14520135323 ],\n \"samples_ts\": [ 35.1883, 35.2378, 35.2614 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:25:51Z", + "avg_ns": 1338768298, + "stddev_ns": 2455948, + "avg_ts": 382.441942, + "stddev_ts": 0.701273, + "samples_ns": [ + 1336418874, + 1338568544, + 1341317478 + ], + "samples_ts": [ + 383.113, + 382.498, + 381.714 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:25:56Z", + "avg_ns": 14533439195, + "stddev_ns": 15400823, + "avg_ts": 35.229128, + "stddev_ts": 0.037319, + "samples_ns": [ + 14550310750, + 14529871512, + 14520135323 + ], + "samples_ts": [ + 35.1883, + 35.2378, + 35.2614 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 415 + }, + { + "timestamp_utc": "2025-12-09T00:26:53.199275+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:26:41Z\",\n \"avg_ns\": 325710292,\n \"stddev_ns\": 3924517,\n \"avg_ts\": 393.025052,\n \"stddev_ts\": 4.702930,\n \"samples_ns\": [ 323513803, 323375839, 330241234 ],\n \"samples_ts\": [ 395.655, 395.824, 387.595 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:26:42Z\",\n \"avg_ns\": 3589552824,\n \"stddev_ns\": 3984821,\n \"avg_ts\": 35.659067,\n \"stddev_ts\": 0.039583,\n \"samples_ns\": [ 3593340546, 3585398102, 3589919826 ],\n \"samples_ts\": [ 35.6214, 35.7004, 35.6554 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:26:41Z", + "avg_ns": 325710292, + "stddev_ns": 3924517, + "avg_ts": 393.025052, + "stddev_ts": 4.70293, + "samples_ns": [ + 323513803, + 323375839, + 330241234 + ], + "samples_ts": [ + 395.655, + 395.824, + 387.595 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:26:42Z", + "avg_ns": 3589552824, + "stddev_ns": 3984821, + "avg_ts": 35.659067, + "stddev_ts": 0.039583, + "samples_ns": [ + 3593340546, + 3585398102, + 3589919826 + ], + "samples_ts": [ + 35.6214, + 35.7004, + 35.6554 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 416 + }, + { + "timestamp_utc": "2025-12-09T00:27:38.984350+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:26:53Z\",\n \"avg_ns\": 322828494,\n \"stddev_ns\": 1405388,\n \"avg_ts\": 396.500358,\n \"stddev_ts\": 1.722035,\n \"samples_ns\": [ 322182834, 321861960, 324440688 ],\n \"samples_ts\": [ 397.29, 397.686, 394.525 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:26:55Z\",\n \"avg_ns\": 14530094028,\n \"stddev_ns\": 16055650,\n \"avg_ts\": 35.237240,\n \"stddev_ts\": 0.038960,\n \"samples_ns\": [ 14538517349, 14511580091, 14540184645 ],\n \"samples_ts\": [ 35.2168, 35.2822, 35.2128 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:26:53Z", + "avg_ns": 322828494, + "stddev_ns": 1405388, + "avg_ts": 396.500358, + "stddev_ts": 1.722035, + "samples_ns": [ + 322182834, + 321861960, + 324440688 + ], + "samples_ts": [ + 397.29, + 397.686, + 394.525 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:26:55Z", + "avg_ns": 14530094028, + "stddev_ns": 16055650, + "avg_ts": 35.23724, + "stddev_ts": 0.03896, + "samples_ns": [ + 14538517349, + 14511580091, + 14540184645 + ], + "samples_ts": [ + 35.2168, + 35.2822, + 35.2128 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 417 + }, + { + "timestamp_utc": "2025-12-09T00:27:56.577591+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:27:39Z\",\n \"avg_ns\": 1475038591,\n \"stddev_ns\": 1136672,\n \"avg_ts\": 347.109700,\n \"stddev_ts\": 0.267377,\n \"samples_ns\": [ 1474553059, 1474225318, 1476337396 ],\n \"samples_ts\": [ 347.224, 347.301, 346.804 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:27:45Z\",\n \"avg_ns\": 3594918936,\n \"stddev_ns\": 10317219,\n \"avg_ts\": 35.606005,\n \"stddev_ts\": 0.102140,\n \"samples_ns\": [ 3593874221, 3605718417, 3585164172 ],\n \"samples_ts\": [ 35.6162, 35.4992, 35.7027 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:27:39Z", + "avg_ns": 1475038591, + "stddev_ns": 1136672, + "avg_ts": 347.1097, + "stddev_ts": 0.267377, + "samples_ns": [ + 1474553059, + 1474225318, + 1476337396 + ], + "samples_ts": [ + 347.224, + 347.301, + 346.804 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:27:45Z", + "avg_ns": 3594918936, + "stddev_ns": 10317219, + "avg_ts": 35.606005, + "stddev_ts": 0.10214, + "samples_ns": [ + 3593874221, + 3605718417, + 3585164172 + ], + "samples_ts": [ + 35.6162, + 35.4992, + 35.7027 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 418 + }, + { + "timestamp_utc": "2025-12-09T00:28:47.247881+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:27:57Z\",\n \"avg_ns\": 1528117802,\n \"stddev_ns\": 872050,\n \"avg_ts\": 335.052776,\n \"stddev_ts\": 0.191066,\n \"samples_ns\": [ 1528767342, 1527127812, 1528458253 ],\n \"samples_ts\": [ 334.91, 335.27, 334.978 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:28:03Z\",\n \"avg_ns\": 14547583221,\n \"stddev_ns\": 2812858,\n \"avg_ts\": 35.194850,\n \"stddev_ts\": 0.006799,\n \"samples_ns\": [ 14548483506, 14544433127, 14549833031 ],\n \"samples_ts\": [ 35.1927, 35.2025, 35.1894 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:27:57Z", + "avg_ns": 1528117802, + "stddev_ns": 872050, + "avg_ts": 335.052776, + "stddev_ts": 0.191066, + "samples_ns": [ + 1528767342, + 1527127812, + 1528458253 + ], + "samples_ts": [ + 334.91, + 335.27, + 334.978 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:28:03Z", + "avg_ns": 14547583221, + "stddev_ns": 2812858, + "avg_ts": 35.19485, + "stddev_ts": 0.006799, + "samples_ns": [ + 14548483506, + 14544433127, + 14549833031 + ], + "samples_ts": [ + 35.1927, + 35.2025, + 35.1894 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 419 + }, + { + "timestamp_utc": "2025-12-09T00:29:00.225668+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:28:48Z\",\n \"avg_ns\": 322462310,\n \"stddev_ns\": 343973,\n \"avg_ts\": 396.945915,\n \"stddev_ts\": 0.422625,\n \"samples_ns\": [ 322205783, 322328581, 322852567 ],\n \"samples_ts\": [ 397.262, 397.11, 396.466 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:28:49Z\",\n \"avg_ns\": 3594080306,\n \"stddev_ns\": 1638939,\n \"avg_ts\": 35.614123,\n \"stddev_ts\": 0.016223,\n \"samples_ns\": [ 3594876220, 3592197821, 3595166879 ],\n \"samples_ts\": [ 35.6062, 35.6328, 35.6034 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:28:48Z", + "avg_ns": 322462310, + "stddev_ns": 343973, + "avg_ts": 396.945915, + "stddev_ts": 0.422625, + "samples_ns": [ + 322205783, + 322328581, + 322852567 + ], + "samples_ts": [ + 397.262, + 397.11, + 396.466 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:28:49Z", + "avg_ns": 3594080306, + "stddev_ns": 1638939, + "avg_ts": 35.614123, + "stddev_ts": 0.016223, + "samples_ns": [ + 3594876220, + 3592197821, + 3595166879 + ], + "samples_ts": [ + 35.6062, + 35.6328, + 35.6034 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 420 + }, + { + "timestamp_utc": "2025-12-09T00:29:45.981859+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:29:01Z\",\n \"avg_ns\": 322259424,\n \"stddev_ns\": 250899,\n \"avg_ts\": 397.195681,\n \"stddev_ts\": 0.308414,\n \"samples_ns\": [ 322022323, 322234918, 322521032 ],\n \"samples_ts\": [ 397.488, 397.226, 396.873 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:29:02Z\",\n \"avg_ns\": 14520495134,\n \"stddev_ns\": 18660751,\n \"avg_ts\": 35.260544,\n \"stddev_ts\": 0.045310,\n \"samples_ns\": [ 14539537507, 14502241814, 14519706082 ],\n \"samples_ts\": [ 35.2143, 35.3049, 35.2624 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:29:01Z", + "avg_ns": 322259424, + "stddev_ns": 250899, + "avg_ts": 397.195681, + "stddev_ts": 0.308414, + "samples_ns": [ + 322022323, + 322234918, + 322521032 + ], + "samples_ts": [ + 397.488, + 397.226, + 396.873 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:29:02Z", + "avg_ns": 14520495134, + "stddev_ns": 18660751, + "avg_ts": 35.260544, + "stddev_ts": 0.04531, + "samples_ns": [ + 14539537507, + 14502241814, + 14519706082 + ], + "samples_ts": [ + 35.2143, + 35.3049, + 35.2624 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 421 + }, + { + "timestamp_utc": "2025-12-09T00:30:02.913683+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:29:46Z\",\n \"avg_ns\": 1318911537,\n \"stddev_ns\": 2766434,\n \"avg_ts\": 388.200032,\n \"stddev_ts\": 0.813645,\n \"samples_ns\": [ 1316504835, 1318296186, 1321933591 ],\n \"samples_ts\": [ 388.909, 388.38, 387.311 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:29:52Z\",\n \"avg_ns\": 3583190286,\n \"stddev_ns\": 3907087,\n \"avg_ts\": 35.722385,\n \"stddev_ts\": 0.038956,\n \"samples_ns\": [ 3586768808, 3583779551, 3579022500 ],\n \"samples_ts\": [ 35.6867, 35.7165, 35.764 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:29:46Z", + "avg_ns": 1318911537, + "stddev_ns": 2766434, + "avg_ts": 388.200032, + "stddev_ts": 0.813645, + "samples_ns": [ + 1316504835, + 1318296186, + 1321933591 + ], + "samples_ts": [ + 388.909, + 388.38, + 387.311 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:29:52Z", + "avg_ns": 3583190286, + "stddev_ns": 3907087, + "avg_ts": 35.722385, + "stddev_ts": 0.038956, + "samples_ns": [ + 3586768808, + 3583779551, + 3579022500 + ], + "samples_ts": [ + 35.6867, + 35.7165, + 35.764 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 422 + }, + { + "timestamp_utc": "2025-12-09T00:30:53.105223+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:30:03Z\",\n \"avg_ns\": 1342082123,\n \"stddev_ns\": 1121497,\n \"avg_ts\": 381.496951,\n \"stddev_ts\": 0.318559,\n \"samples_ns\": [ 1343285306, 1341893665, 1341067399 ],\n \"samples_ts\": [ 381.155, 381.55, 381.785 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:30:09Z\",\n \"avg_ns\": 14638701117,\n \"stddev_ns\": 8164727,\n \"avg_ts\": 34.975788,\n \"stddev_ts\": 0.019504,\n \"samples_ns\": [ 14631574370, 14647609571, 14636919410 ],\n \"samples_ts\": [ 34.9928, 34.9545, 34.98 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:30:03Z", + "avg_ns": 1342082123, + "stddev_ns": 1121497, + "avg_ts": 381.496951, + "stddev_ts": 0.318559, + "samples_ns": [ + 1343285306, + 1341893665, + 1341067399 + ], + "samples_ts": [ + 381.155, + 381.55, + 381.785 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:30:09Z", + "avg_ns": 14638701117, + "stddev_ns": 8164727, + "avg_ts": 34.975788, + "stddev_ts": 0.019504, + "samples_ns": [ + 14631574370, + 14647609571, + 14636919410 + ], + "samples_ts": [ + 34.9928, + 34.9545, + 34.98 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 423 + }, + { + "timestamp_utc": "2025-12-09T00:31:06.057603+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:30:53Z\",\n \"avg_ns\": 322985019,\n \"stddev_ns\": 662143,\n \"avg_ts\": 396.304321,\n \"stddev_ts\": 0.812646,\n \"samples_ns\": [ 322298951, 323035791, 323620315 ],\n \"samples_ts\": [ 397.147, 396.241, 395.525 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:30:55Z\",\n \"avg_ns\": 3582985563,\n \"stddev_ns\": 2426822,\n \"avg_ts\": 35.724408,\n \"stddev_ts\": 0.024202,\n \"samples_ns\": [ 3580342979, 3583499370, 3585114340 ],\n \"samples_ts\": [ 35.7508, 35.7193, 35.7032 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:30:53Z", + "avg_ns": 322985019, + "stddev_ns": 662143, + "avg_ts": 396.304321, + "stddev_ts": 0.812646, + "samples_ns": [ + 322298951, + 323035791, + 323620315 + ], + "samples_ts": [ + 397.147, + 396.241, + 395.525 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:30:55Z", + "avg_ns": 3582985563, + "stddev_ns": 2426822, + "avg_ts": 35.724408, + "stddev_ts": 0.024202, + "samples_ns": [ + 3580342979, + 3583499370, + 3585114340 + ], + "samples_ts": [ + 35.7508, + 35.7193, + 35.7032 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 424 + }, + { + "timestamp_utc": "2025-12-09T00:31:51.997845+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:31:06Z\",\n \"avg_ns\": 322347331,\n \"stddev_ns\": 203983,\n \"avg_ts\": 397.087306,\n \"stddev_ts\": 0.249244,\n \"samples_ns\": [ 322200250, 322263584, 322578161 ],\n \"samples_ts\": [ 397.268, 397.19, 396.803 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:31:08Z\",\n \"avg_ns\": 14578013228,\n \"stddev_ns\": 16147001,\n \"avg_ts\": 35.121413,\n \"stddev_ts\": 0.038920,\n \"samples_ns\": [ 14559737966, 14583955927, 14590345793 ],\n \"samples_ts\": [ 35.1655, 35.1071, 35.0917 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:31:06Z", + "avg_ns": 322347331, + "stddev_ns": 203983, + "avg_ts": 397.087306, + "stddev_ts": 0.249244, + "samples_ns": [ + 322200250, + 322263584, + 322578161 + ], + "samples_ts": [ + 397.268, + 397.19, + 396.803 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:31:08Z", + "avg_ns": 14578013228, + "stddev_ns": 16147001, + "avg_ts": 35.121413, + "stddev_ts": 0.03892, + "samples_ns": [ + 14559737966, + 14583955927, + 14590345793 + ], + "samples_ts": [ + 35.1655, + 35.1071, + 35.0917 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 425 + }, + { + "timestamp_utc": "2025-12-09T00:32:09.253504+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:31:52Z\",\n \"avg_ns\": 1387063227,\n \"stddev_ns\": 4058953,\n \"avg_ts\": 369.127311,\n \"stddev_ts\": 1.078777,\n \"samples_ns\": [ 1385770819, 1383808024, 1391610839 ],\n \"samples_ts\": [ 369.469, 369.994, 367.919 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:31:58Z\",\n \"avg_ns\": 3598527334,\n \"stddev_ns\": 11692957,\n \"avg_ts\": 35.570356,\n \"stddev_ts\": 0.115374,\n \"samples_ns\": [ 3592922044, 3590692273, 3611967685 ],\n \"samples_ts\": [ 35.6256, 35.6477, 35.4377 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:31:52Z", + "avg_ns": 1387063227, + "stddev_ns": 4058953, + "avg_ts": 369.127311, + "stddev_ts": 1.078777, + "samples_ns": [ + 1385770819, + 1383808024, + 1391610839 + ], + "samples_ts": [ + 369.469, + 369.994, + 367.919 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:31:58Z", + "avg_ns": 3598527334, + "stddev_ns": 11692957, + "avg_ts": 35.570356, + "stddev_ts": 0.115374, + "samples_ns": [ + 3592922044, + 3590692273, + 3611967685 + ], + "samples_ts": [ + 35.6256, + 35.6477, + 35.4377 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 426 + }, + { + "timestamp_utc": "2025-12-09T00:32:59.821883+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:32:10Z\",\n \"avg_ns\": 1392945052,\n \"stddev_ns\": 2294719,\n \"avg_ts\": 367.567211,\n \"stddev_ts\": 0.606059,\n \"samples_ns\": [ 1390317711, 1394556289, 1393961156 ],\n \"samples_ts\": [ 368.261, 367.142, 367.299 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:32:15Z\",\n \"avg_ns\": 14688339633,\n \"stddev_ns\": 10850029,\n \"avg_ts\": 34.857594,\n \"stddev_ts\": 0.025737,\n \"samples_ns\": [ 14683411667, 14700778325, 14680828908 ],\n \"samples_ts\": [ 34.8693, 34.8281, 34.8754 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:32:10Z", + "avg_ns": 1392945052, + "stddev_ns": 2294719, + "avg_ts": 367.567211, + "stddev_ts": 0.606059, + "samples_ns": [ + 1390317711, + 1394556289, + 1393961156 + ], + "samples_ts": [ + 368.261, + 367.142, + 367.299 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:32:15Z", + "avg_ns": 14688339633, + "stddev_ns": 10850029, + "avg_ts": 34.857594, + "stddev_ts": 0.025737, + "samples_ns": [ + 14683411667, + 14700778325, + 14680828908 + ], + "samples_ts": [ + 34.8693, + 34.8281, + 34.8754 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 427 + }, + { + "timestamp_utc": "2025-12-09T00:33:12.823270+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:33:00Z\",\n \"avg_ns\": 324752071,\n \"stddev_ns\": 3224536,\n \"avg_ts\": 394.172766,\n \"stddev_ts\": 3.917837,\n \"samples_ns\": [ 327865403, 321426787, 324964023 ],\n \"samples_ts\": [ 390.404, 398.224, 393.89 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:33:01Z\",\n \"avg_ns\": 3587417776,\n \"stddev_ns\": 7276433,\n \"avg_ts\": 35.680358,\n \"stddev_ts\": 0.072350,\n \"samples_ns\": [ 3586790195, 3594987427, 3580475707 ],\n \"samples_ts\": [ 35.6865, 35.6051, 35.7494 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:33:00Z", + "avg_ns": 324752071, + "stddev_ns": 3224536, + "avg_ts": 394.172766, + "stddev_ts": 3.917837, + "samples_ns": [ + 327865403, + 321426787, + 324964023 + ], + "samples_ts": [ + 390.404, + 398.224, + 393.89 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:33:01Z", + "avg_ns": 3587417776, + "stddev_ns": 7276433, + "avg_ts": 35.680358, + "stddev_ts": 0.07235, + "samples_ns": [ + 3586790195, + 3594987427, + 3580475707 + ], + "samples_ts": [ + 35.6865, + 35.6051, + 35.7494 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 428 + }, + { + "timestamp_utc": "2025-12-09T00:33:58.705531+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:33:13Z\",\n \"avg_ns\": 324072172,\n \"stddev_ns\": 2585552,\n \"avg_ts\": 394.990434,\n \"stddev_ts\": 3.137343,\n \"samples_ns\": [ 327047860, 322374465, 322794191 ],\n \"samples_ts\": [ 391.38, 397.054, 396.537 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:33:14Z\",\n \"avg_ns\": 14558231212,\n \"stddev_ns\": 5728306,\n \"avg_ts\": 35.169111,\n \"stddev_ts\": 0.013838,\n \"samples_ns\": [ 14562543091, 14560417475, 14551733071 ],\n \"samples_ts\": [ 35.1587, 35.1638, 35.1848 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:33:13Z", + "avg_ns": 324072172, + "stddev_ns": 2585552, + "avg_ts": 394.990434, + "stddev_ts": 3.137343, + "samples_ns": [ + 327047860, + 322374465, + 322794191 + ], + "samples_ts": [ + 391.38, + 397.054, + 396.537 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:33:14Z", + "avg_ns": 14558231212, + "stddev_ns": 5728306, + "avg_ts": 35.169111, + "stddev_ts": 0.013838, + "samples_ns": [ + 14562543091, + 14560417475, + 14551733071 + ], + "samples_ts": [ + 35.1587, + 35.1638, + 35.1848 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 429 + }, + { + "timestamp_utc": "2025-12-09T00:34:16.548442+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:33:59Z\",\n \"avg_ns\": 1536336889,\n \"stddev_ns\": 1887398,\n \"avg_ts\": 333.260575,\n \"stddev_ts\": 0.409109,\n \"samples_ns\": [ 1536010064, 1534635071, 1538365534 ],\n \"samples_ts\": [ 333.331, 333.63, 332.821 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:34:05Z\",\n \"avg_ns\": 3596991173,\n \"stddev_ns\": 1549159,\n \"avg_ts\": 35.585302,\n \"stddev_ts\": 0.015327,\n \"samples_ns\": [ 3598427087, 3595349361, 3597197071 ],\n \"samples_ts\": [ 35.5711, 35.6015, 35.5833 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:33:59Z", + "avg_ns": 1536336889, + "stddev_ns": 1887398, + "avg_ts": 333.260575, + "stddev_ts": 0.409109, + "samples_ns": [ + 1536010064, + 1534635071, + 1538365534 + ], + "samples_ts": [ + 333.331, + 333.63, + 332.821 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:34:05Z", + "avg_ns": 3596991173, + "stddev_ns": 1549159, + "avg_ts": 35.585302, + "stddev_ts": 0.015327, + "samples_ns": [ + 3598427087, + 3595349361, + 3597197071 + ], + "samples_ts": [ + 35.5711, + 35.6015, + 35.5833 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 430 + }, + { + "timestamp_utc": "2025-12-09T00:35:07.755610+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:34:17Z\",\n \"avg_ns\": 1531468358,\n \"stddev_ns\": 1659091,\n \"avg_ts\": 334.319934,\n \"stddev_ts\": 0.362063,\n \"samples_ns\": [ 1531711224, 1529702151, 1532991701 ],\n \"samples_ts\": [ 334.267, 334.706, 333.987 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 270M Q2_K - Medium\",\n \"model_size\": 230552064,\n \"model_n_params\": 268098176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:34:23Z\",\n \"avg_ns\": 14721699820,\n \"stddev_ns\": 24190581,\n \"avg_ts\": 34.778655,\n \"stddev_ts\": 0.057095,\n \"samples_ns\": [ 14749530804, 14705723560, 14709845097 ],\n \"samples_ts\": [ 34.713, 34.8164, 34.8066 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:34:17Z", + "avg_ns": 1531468358, + "stddev_ns": 1659091, + "avg_ts": 334.319934, + "stddev_ts": 0.362063, + "samples_ns": [ + 1531711224, + 1529702151, + 1532991701 + ], + "samples_ts": [ + 334.267, + 334.706, + 333.987 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_type": "gemma3 270M Q2_K - Medium", + "model_size": 230552064, + "model_n_params": 268098176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:34:23Z", + "avg_ns": 14721699820, + "stddev_ns": 24190581, + "avg_ts": 34.778655, + "stddev_ts": 0.057095, + "samples_ns": [ + 14749530804, + 14705723560, + 14709845097 + ], + "samples_ts": [ + 34.713, + 34.8164, + 34.8066 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-270M-it-GGUF/gemma-3-270m-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-270M-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 431 + }, + { + "timestamp_utc": "2025-12-09T00:36:49.427521+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:35:17Z\",\n \"avg_ns\": 11022977017,\n \"stddev_ns\": 18402675,\n \"avg_ts\": 11.612130,\n \"stddev_ts\": 0.019376,\n \"samples_ns\": [ 11019054573, 11043024374, 11006852105 ],\n \"samples_ts\": [ 11.6162, 11.591, 11.6291 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:36:01Z\",\n \"avg_ns\": 15779086672,\n \"stddev_ns\": 11387911,\n \"avg_ts\": 8.112006,\n \"stddev_ts\": 0.005853,\n \"samples_ns\": [ 15777086004, 15791342339, 15768831673 ],\n \"samples_ts\": [ 8.11303, 8.10571, 8.11728 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:35:17Z", + "avg_ns": 11022977017, + "stddev_ns": 18402675, + "avg_ts": 11.61213, + "stddev_ts": 0.019376, + "samples_ns": [ + 11019054573, + 11043024374, + 11006852105 + ], + "samples_ts": [ + 11.6162, + 11.591, + 11.6291 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:36:01Z", + "avg_ns": 15779086672, + "stddev_ns": 11387911, + "avg_ts": 8.112006, + "stddev_ts": 0.005853, + "samples_ns": [ + 15777086004, + 15791342339, + 15768831673 + ], + "samples_ts": [ + 8.11303, + 8.10571, + 8.11728 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 432 + }, + { + "timestamp_utc": "2025-12-09T00:40:44.683544+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:36:50Z\",\n \"avg_ns\": 10979671982,\n \"stddev_ns\": 646581,\n \"avg_ts\": 11.657907,\n \"stddev_ts\": 0.000687,\n \"samples_ns\": [ 10980150916, 10979928534, 10978936496 ],\n \"samples_ts\": [ 11.6574, 11.6576, 11.6587 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:37:34Z\",\n \"avg_ns\": 63423023116,\n \"stddev_ns\": 2652065,\n \"avg_ts\": 8.072778,\n \"stddev_ts\": 0.000335,\n \"samples_ns\": [ 63420127605, 63425257393, 63423684352 ],\n \"samples_ts\": [ 8.07315, 8.07249, 8.07269 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:36:50Z", + "avg_ns": 10979671982, + "stddev_ns": 646581, + "avg_ts": 11.657907, + "stddev_ts": 0.000687, + "samples_ns": [ + 10980150916, + 10979928534, + 10978936496 + ], + "samples_ts": [ + 11.6574, + 11.6576, + 11.6587 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:37:34Z", + "avg_ns": 63423023116, + "stddev_ns": 2652065, + "avg_ts": 8.072778, + "stddev_ts": 0.000335, + "samples_ns": [ + 63420127605, + 63425257393, + 63423684352 + ], + "samples_ts": [ + 8.07315, + 8.07249, + 8.07269 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 433 + }, + { + "timestamp_utc": "2025-12-09T00:44:29.436553+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:40:45Z\",\n \"avg_ns\": 44157705793,\n \"stddev_ns\": 2531029,\n \"avg_ts\": 11.594805,\n \"stddev_ts\": 0.000660,\n \"samples_ns\": [ 44158536822, 44159698565, 44154881994 ],\n \"samples_ts\": [ 11.5946, 11.5943, 11.5955 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:43:42Z\",\n \"avg_ns\": 15690445977,\n \"stddev_ns\": 1688104,\n \"avg_ts\": 8.157831,\n \"stddev_ts\": 0.000873,\n \"samples_ns\": [ 15691912548, 15690810506, 15688614879 ],\n \"samples_ts\": [ 8.15707, 8.15764, 8.15878 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:40:45Z", + "avg_ns": 44157705793, + "stddev_ns": 2531029, + "avg_ts": 11.594805, + "stddev_ts": 0.00066, + "samples_ns": [ + 44158536822, + 44159698565, + 44154881994 + ], + "samples_ts": [ + 11.5946, + 11.5943, + 11.5955 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:43:42Z", + "avg_ns": 15690445977, + "stddev_ns": 1688104, + "avg_ts": 8.157831, + "stddev_ts": 0.000873, + "samples_ns": [ + 15691912548, + 15690810506, + 15688614879 + ], + "samples_ts": [ + 8.15707, + 8.15764, + 8.15878 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 434 + }, + { + "timestamp_utc": "2025-12-09T00:50:37.553793+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:44:30Z\",\n \"avg_ns\": 44174972181,\n \"stddev_ns\": 766610,\n \"avg_ts\": 11.590273,\n \"stddev_ts\": 0.000193,\n \"samples_ns\": [ 44174314821, 44175769289, 44174832434 ],\n \"samples_ts\": [ 11.5904, 11.5901, 11.5903 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:47:26Z\",\n \"avg_ns\": 63456755748,\n \"stddev_ns\": 1654665,\n \"avg_ts\": 8.068487,\n \"stddev_ts\": 0.000208,\n \"samples_ns\": [ 63458642734, 63455874898, 63455749613 ],\n \"samples_ts\": [ 8.06825, 8.0686, 8.06861 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:44:30Z", + "avg_ns": 44174972181, + "stddev_ns": 766610, + "avg_ts": 11.590273, + "stddev_ts": 0.000193, + "samples_ns": [ + 44174314821, + 44175769289, + 44174832434 + ], + "samples_ts": [ + 11.5904, + 11.5901, + 11.5903 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:47:26Z", + "avg_ns": 63456755748, + "stddev_ns": 1654665, + "avg_ts": 8.068487, + "stddev_ts": 0.000208, + "samples_ns": [ + 63458642734, + 63455874898, + 63455749613 + ], + "samples_ts": [ + 8.06825, + 8.0686, + 8.06861 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 435 + }, + { + "timestamp_utc": "2025-12-09T00:52:09.830068+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:50:38Z\",\n \"avg_ns\": 10979588178,\n \"stddev_ns\": 1091666,\n \"avg_ts\": 11.657997,\n \"stddev_ts\": 0.001148,\n \"samples_ns\": [ 10979294827, 10978683553, 10980786156 ],\n \"samples_ts\": [ 11.6583, 11.659, 11.6567 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:51:22Z\",\n \"avg_ns\": 15770652865,\n \"stddev_ns\": 591393,\n \"avg_ts\": 8.116341,\n \"stddev_ts\": 0.000304,\n \"samples_ns\": [ 15771273638, 15770096051, 15770588906 ],\n \"samples_ts\": [ 8.11602, 8.11663, 8.11637 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:50:38Z", + "avg_ns": 10979588178, + "stddev_ns": 1091666, + "avg_ts": 11.657997, + "stddev_ts": 0.001148, + "samples_ns": [ + 10979294827, + 10978683553, + 10980786156 + ], + "samples_ts": [ + 11.6583, + 11.659, + 11.6567 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:51:22Z", + "avg_ns": 15770652865, + "stddev_ns": 591393, + "avg_ts": 8.116341, + "stddev_ts": 0.000304, + "samples_ns": [ + 15771273638, + 15770096051, + 15770588906 + ], + "samples_ts": [ + 8.11602, + 8.11663, + 8.11637 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 436 + }, + { + "timestamp_utc": "2025-12-09T00:56:05.646109+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:52:10Z\",\n \"avg_ns\": 10983969630,\n \"stddev_ns\": 1034806,\n \"avg_ts\": 11.653346,\n \"stddev_ts\": 0.001087,\n \"samples_ns\": [ 10984072000, 10984938739, 10982898153 ],\n \"samples_ts\": [ 11.6532, 11.6523, 11.6545 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:52:54Z\",\n \"avg_ns\": 63609411151,\n \"stddev_ns\": 757466,\n \"avg_ts\": 8.049123,\n \"stddev_ts\": 0.000096,\n \"samples_ns\": [ 63609155414, 63610263384, 63608814655 ],\n \"samples_ts\": [ 8.04916, 8.04902, 8.0492 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:52:10Z", + "avg_ns": 10983969630, + "stddev_ns": 1034806, + "avg_ts": 11.653346, + "stddev_ts": 0.001087, + "samples_ns": [ + 10984072000, + 10984938739, + 10982898153 + ], + "samples_ts": [ + 11.6532, + 11.6523, + 11.6545 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T00:52:54Z", + "avg_ns": 63609411151, + "stddev_ns": 757466, + "avg_ts": 8.049123, + "stddev_ts": 9.6e-05, + "samples_ns": [ + 63609155414, + 63610263384, + 63608814655 + ], + "samples_ts": [ + 8.04916, + 8.04902, + 8.0492 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 437 + }, + { + "timestamp_utc": "2025-12-09T00:59:50.439141+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:56:06Z\",\n \"avg_ns\": 44163994825,\n \"stddev_ns\": 819910,\n \"avg_ts\": 11.593154,\n \"stddev_ts\": 0.000215,\n \"samples_ns\": [ 44164029762, 44164796709, 44163158004 ],\n \"samples_ts\": [ 11.5931, 11.5929, 11.5934 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:59:03Z\",\n \"avg_ns\": 15680538362,\n \"stddev_ns\": 485862,\n \"avg_ts\": 8.162985,\n \"stddev_ts\": 0.000244,\n \"samples_ns\": [ 15681078749, 15680305161, 15680231177 ],\n \"samples_ts\": [ 8.1627, 8.16311, 8.16314 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:56:06Z", + "avg_ns": 44163994825, + "stddev_ns": 819910, + "avg_ts": 11.593154, + "stddev_ts": 0.000215, + "samples_ns": [ + 44164029762, + 44164796709, + 44163158004 + ], + "samples_ts": [ + 11.5931, + 11.5929, + 11.5934 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T00:59:03Z", + "avg_ns": 15680538362, + "stddev_ns": 485862, + "avg_ts": 8.162985, + "stddev_ts": 0.000244, + "samples_ns": [ + 15681078749, + 15680305161, + 15680231177 + ], + "samples_ts": [ + 8.1627, + 8.16311, + 8.16314 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 438 + }, + { + "timestamp_utc": "2025-12-09T01:05:58.201115+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T00:59:51Z\",\n \"avg_ns\": 44172862291,\n \"stddev_ns\": 2964244,\n \"avg_ts\": 11.590827,\n \"stddev_ts\": 0.000778,\n \"samples_ns\": [ 44176252586, 44170759503, 44171574784 ],\n \"samples_ts\": [ 11.5899, 11.5914, 11.5912 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:02:48Z\",\n \"avg_ns\": 63324162500,\n \"stddev_ns\": 3417222,\n \"avg_ts\": 8.085381,\n \"stddev_ts\": 0.000434,\n \"samples_ns\": [ 63326973491, 63320385401, 63325128610 ],\n \"samples_ts\": [ 8.08502, 8.08586, 8.08526 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T00:59:51Z", + "avg_ns": 44172862291, + "stddev_ns": 2964244, + "avg_ts": 11.590827, + "stddev_ts": 0.000778, + "samples_ns": [ + 44176252586, + 44170759503, + 44171574784 + ], + "samples_ts": [ + 11.5899, + 11.5914, + 11.5912 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T01:02:48Z", + "avg_ns": 63324162500, + "stddev_ns": 3417222, + "avg_ts": 8.085381, + "stddev_ts": 0.000434, + "samples_ns": [ + 63326973491, + 63320385401, + 63325128610 + ], + "samples_ts": [ + 8.08502, + 8.08586, + 8.08526 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 439 + }, + { + "timestamp_utc": "2025-12-09T01:07:30.458184+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:05:59Z\",\n \"avg_ns\": 10979049225,\n \"stddev_ns\": 749696,\n \"avg_ts\": 11.658569,\n \"stddev_ts\": 0.000788,\n \"samples_ns\": [ 10979903103, 10978557203, 10978687370 ],\n \"samples_ts\": [ 11.6577, 11.6591, 11.659 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:06:42Z\",\n \"avg_ns\": 15760287870,\n \"stddev_ns\": 1339820,\n \"avg_ts\": 8.121679,\n \"stddev_ts\": 0.000687,\n \"samples_ns\": [ 15760078172, 15759071213, 15761714226 ],\n \"samples_ts\": [ 8.12179, 8.12231, 8.12094 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:05:59Z", + "avg_ns": 10979049225, + "stddev_ns": 749696, + "avg_ts": 11.658569, + "stddev_ts": 0.000788, + "samples_ns": [ + 10979903103, + 10978557203, + 10978687370 + ], + "samples_ts": [ + 11.6577, + 11.6591, + 11.659 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T01:06:42Z", + "avg_ns": 15760287870, + "stddev_ns": 1339820, + "avg_ts": 8.121679, + "stddev_ts": 0.000687, + "samples_ns": [ + 15760078172, + 15759071213, + 15761714226 + ], + "samples_ts": [ + 8.12179, + 8.12231, + 8.12094 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 440 + }, + { + "timestamp_utc": "2025-12-09T01:11:25.781124+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:07:31Z\",\n \"avg_ns\": 10978729294,\n \"stddev_ns\": 1182006,\n \"avg_ts\": 11.658909,\n \"stddev_ts\": 0.001245,\n \"samples_ns\": [ 10978667821, 10979931504, 10977588559 ],\n \"samples_ts\": [ 11.659, 11.6576, 11.6601 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:08:15Z\",\n \"avg_ns\": 63450406255,\n \"stddev_ns\": 2026402,\n \"avg_ts\": 8.069294,\n \"stddev_ts\": 0.000258,\n \"samples_ns\": [ 63448945424, 63449553703, 63452719638 ],\n \"samples_ts\": [ 8.06948, 8.0694, 8.069 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:07:31Z", + "avg_ns": 10978729294, + "stddev_ns": 1182006, + "avg_ts": 11.658909, + "stddev_ts": 0.001245, + "samples_ns": [ + 10978667821, + 10979931504, + 10977588559 + ], + "samples_ts": [ + 11.659, + 11.6576, + 11.6601 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T01:08:15Z", + "avg_ns": 63450406255, + "stddev_ns": 2026402, + "avg_ts": 8.069294, + "stddev_ts": 0.000258, + "samples_ns": [ + 63448945424, + 63449553703, + 63452719638 + ], + "samples_ts": [ + 8.06948, + 8.0694, + 8.069 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 441 + }, + { + "timestamp_utc": "2025-12-09T01:15:12.242188+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:11:26Z\",\n \"avg_ns\": 44571205395,\n \"stddev_ns\": 1041669,\n \"avg_ts\": 11.487237,\n \"stddev_ts\": 0.000257,\n \"samples_ns\": [ 44571430992, 44572071251, 44570113944 ],\n \"samples_ts\": [ 11.4872, 11.487, 11.4875 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:14:24Z\",\n \"avg_ns\": 15707462552,\n \"stddev_ns\": 1458659,\n \"avg_ts\": 8.148993,\n \"stddev_ts\": 0.000757,\n \"samples_ns\": [ 15707996633, 15708578897, 15705812126 ],\n \"samples_ts\": [ 8.14872, 8.14841, 8.14985 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:11:26Z", + "avg_ns": 44571205395, + "stddev_ns": 1041669, + "avg_ts": 11.487237, + "stddev_ts": 0.000257, + "samples_ns": [ + 44571430992, + 44572071251, + 44570113944 + ], + "samples_ts": [ + 11.4872, + 11.487, + 11.4875 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T01:14:24Z", + "avg_ns": 15707462552, + "stddev_ns": 1458659, + "avg_ts": 8.148993, + "stddev_ts": 0.000757, + "samples_ns": [ + 15707996633, + 15708578897, + 15705812126 + ], + "samples_ts": [ + 8.14872, + 8.14841, + 8.14985 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 442 + }, + { + "timestamp_utc": "2025-12-09T01:21:21.301399+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:15:13Z\",\n \"avg_ns\": 44528824822,\n \"stddev_ns\": 1838079,\n \"avg_ts\": 11.498170,\n \"stddev_ts\": 0.000468,\n \"samples_ns\": [ 44530120765, 44529601590, 44526752113 ],\n \"samples_ts\": [ 11.4978, 11.498, 11.4987 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:18:11Z\",\n \"avg_ns\": 63294550717,\n \"stddev_ns\": 3115813,\n \"avg_ts\": 8.089164,\n \"stddev_ts\": 0.000397,\n \"samples_ns\": [ 63298072551, 63293375048, 63292204553 ],\n \"samples_ts\": [ 8.08871, 8.08931, 8.08946 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:15:13Z", + "avg_ns": 44528824822, + "stddev_ns": 1838079, + "avg_ts": 11.49817, + "stddev_ts": 0.000468, + "samples_ns": [ + 44530120765, + 44529601590, + 44526752113 + ], + "samples_ts": [ + 11.4978, + 11.498, + 11.4987 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T01:18:11Z", + "avg_ns": 63294550717, + "stddev_ns": 3115813, + "avg_ts": 8.089164, + "stddev_ts": 0.000397, + "samples_ns": [ + 63298072551, + 63293375048, + 63292204553 + ], + "samples_ts": [ + 8.08871, + 8.08931, + 8.08946 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 443 + }, + { + "timestamp_utc": "2025-12-09T01:22:53.360610+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:21:22Z\",\n \"avg_ns\": 10979345018,\n \"stddev_ns\": 719707,\n \"avg_ts\": 11.658255,\n \"stddev_ts\": 0.000748,\n \"samples_ns\": [ 10978569601, 10979520440, 10979945015 ],\n \"samples_ts\": [ 11.6591, 11.6581, 11.6576 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:22:06Z\",\n \"avg_ns\": 15697248414,\n \"stddev_ns\": 1239686,\n \"avg_ts\": 8.154295,\n \"stddev_ts\": 0.000644,\n \"samples_ns\": [ 15698679318, 15696567717, 15696498207 ],\n \"samples_ts\": [ 8.15355, 8.15465, 8.15469 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:21:22Z", + "avg_ns": 10979345018, + "stddev_ns": 719707, + "avg_ts": 11.658255, + "stddev_ts": 0.000748, + "samples_ns": [ + 10978569601, + 10979520440, + 10979945015 + ], + "samples_ts": [ + 11.6591, + 11.6581, + 11.6576 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T01:22:06Z", + "avg_ns": 15697248414, + "stddev_ns": 1239686, + "avg_ts": 8.154295, + "stddev_ts": 0.000644, + "samples_ns": [ + 15698679318, + 15696567717, + 15696498207 + ], + "samples_ts": [ + 8.15355, + 8.15465, + 8.15469 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 444 + }, + { + "timestamp_utc": "2025-12-09T01:26:49.185757+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:22:54Z\",\n \"avg_ns\": 10978693419,\n \"stddev_ns\": 742099,\n \"avg_ts\": 11.658947,\n \"stddev_ts\": 0.000772,\n \"samples_ns\": [ 10979511414, 10978448492, 10978120353 ],\n \"samples_ts\": [ 11.6581, 11.6592, 11.6596 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:23:38Z\",\n \"avg_ns\": 63616565520,\n \"stddev_ns\": 4631889,\n \"avg_ts\": 8.048218,\n \"stddev_ts\": 0.000586,\n \"samples_ns\": [ 63621772655, 63615019643, 63612904262 ],\n \"samples_ts\": [ 8.04756, 8.04841, 8.04868 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:22:54Z", + "avg_ns": 10978693419, + "stddev_ns": 742099, + "avg_ts": 11.658947, + "stddev_ts": 0.000772, + "samples_ns": [ + 10979511414, + 10978448492, + 10978120353 + ], + "samples_ts": [ + 11.6581, + 11.6592, + 11.6596 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T01:23:38Z", + "avg_ns": 63616565520, + "stddev_ns": 4631889, + "avg_ts": 8.048218, + "stddev_ts": 0.000586, + "samples_ns": [ + 63621772655, + 63615019643, + 63612904262 + ], + "samples_ts": [ + 8.04756, + 8.04841, + 8.04868 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 445 + }, + { + "timestamp_utc": "2025-12-09T01:30:33.935965+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:26:50Z\",\n \"avg_ns\": 44165568390,\n \"stddev_ns\": 4523972,\n \"avg_ts\": 11.592741,\n \"stddev_ts\": 0.001187,\n \"samples_ns\": [ 44169857071, 44166007021, 44160841078 ],\n \"samples_ts\": [ 11.5916, 11.5926, 11.594 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:29:46Z\",\n \"avg_ns\": 15676105994,\n \"stddev_ns\": 1326118,\n \"avg_ts\": 8.165293,\n \"stddev_ts\": 0.000688,\n \"samples_ns\": [ 15677410860, 15676136137, 15674770986 ],\n \"samples_ts\": [ 8.16461, 8.16528, 8.16599 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:26:50Z", + "avg_ns": 44165568390, + "stddev_ns": 4523972, + "avg_ts": 11.592741, + "stddev_ts": 0.001187, + "samples_ns": [ + 44169857071, + 44166007021, + 44160841078 + ], + "samples_ts": [ + 11.5916, + 11.5926, + 11.594 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T01:29:46Z", + "avg_ns": 15676105994, + "stddev_ns": 1326118, + "avg_ts": 8.165293, + "stddev_ts": 0.000688, + "samples_ns": [ + 15677410860, + 15676136137, + 15674770986 + ], + "samples_ts": [ + 8.16461, + 8.16528, + 8.16599 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 446 + }, + { + "timestamp_utc": "2025-12-09T01:36:41.537505+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:30:34Z\",\n \"avg_ns\": 44161054705,\n \"stddev_ns\": 2043254,\n \"avg_ts\": 11.593926,\n \"stddev_ts\": 0.000531,\n \"samples_ns\": [ 44161274150, 44162957556, 44158932411 ],\n \"samples_ts\": [ 11.5939, 11.5934, 11.5945 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:33:31Z\",\n \"avg_ns\": 63301623875,\n \"stddev_ns\": 1815355,\n \"avg_ts\": 8.088260,\n \"stddev_ts\": 0.000232,\n \"samples_ns\": [ 63302427670, 63299545389, 63302898566 ],\n \"samples_ts\": [ 8.08816, 8.08853, 8.0881 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:30:34Z", + "avg_ns": 44161054705, + "stddev_ns": 2043254, + "avg_ts": 11.593926, + "stddev_ts": 0.000531, + "samples_ns": [ + 44161274150, + 44162957556, + 44158932411 + ], + "samples_ts": [ + 11.5939, + 11.5934, + 11.5945 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T01:33:31Z", + "avg_ns": 63301623875, + "stddev_ns": 1815355, + "avg_ts": 8.08826, + "stddev_ts": 0.000232, + "samples_ns": [ + 63302427670, + 63299545389, + 63302898566 + ], + "samples_ts": [ + 8.08816, + 8.08853, + 8.0881 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 447 + }, + { + "timestamp_utc": "2025-12-09T01:38:13.646315+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:36:42Z\",\n \"avg_ns\": 10974235568,\n \"stddev_ns\": 2413796,\n \"avg_ts\": 11.663683,\n \"stddev_ts\": 0.002565,\n \"samples_ns\": [ 10977009891, 10973080272, 10972616541 ],\n \"samples_ts\": [ 11.6607, 11.6649, 11.6654 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:37:26Z\",\n \"avg_ns\": 15718845871,\n \"stddev_ns\": 383875,\n \"avg_ts\": 8.143091,\n \"stddev_ts\": 0.000176,\n \"samples_ns\": [ 15719149137, 15718477570, 15718910908 ],\n \"samples_ts\": [ 8.14293, 8.14328, 8.14306 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:36:42Z", + "avg_ns": 10974235568, + "stddev_ns": 2413796, + "avg_ts": 11.663683, + "stddev_ts": 0.002565, + "samples_ns": [ + 10977009891, + 10973080272, + 10972616541 + ], + "samples_ts": [ + 11.6607, + 11.6649, + 11.6654 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T01:37:26Z", + "avg_ns": 15718845871, + "stddev_ns": 383875, + "avg_ts": 8.143091, + "stddev_ts": 0.000176, + "samples_ns": [ + 15719149137, + 15718477570, + 15718910908 + ], + "samples_ts": [ + 8.14293, + 8.14328, + 8.14306 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 448 + }, + { + "timestamp_utc": "2025-12-09T01:42:09.239337+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:38:14Z\",\n \"avg_ns\": 10978326198,\n \"stddev_ns\": 735799,\n \"avg_ts\": 11.659337,\n \"stddev_ts\": 0.000774,\n \"samples_ns\": [ 10977485386, 10978732517, 10978760692 ],\n \"samples_ts\": [ 11.6602, 11.6589, 11.6589 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:38:58Z\",\n \"avg_ns\": 63541656509,\n \"stddev_ns\": 1860566,\n \"avg_ts\": 8.057706,\n \"stddev_ts\": 0.000232,\n \"samples_ns\": [ 63542568044, 63542847392, 63539554093 ],\n \"samples_ts\": [ 8.05759, 8.05756, 8.05797 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:38:14Z", + "avg_ns": 10978326198, + "stddev_ns": 735799, + "avg_ts": 11.659337, + "stddev_ts": 0.000774, + "samples_ns": [ + 10977485386, + 10978732517, + 10978760692 + ], + "samples_ts": [ + 11.6602, + 11.6589, + 11.6589 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T01:38:58Z", + "avg_ns": 63541656509, + "stddev_ns": 1860566, + "avg_ts": 8.057706, + "stddev_ts": 0.000232, + "samples_ns": [ + 63542568044, + 63542847392, + 63539554093 + ], + "samples_ts": [ + 8.05759, + 8.05756, + 8.05797 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 449 + }, + { + "timestamp_utc": "2025-12-09T01:45:54.137055+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:42:10Z\",\n \"avg_ns\": 44186558829,\n \"stddev_ns\": 2420634,\n \"avg_ts\": 11.587234,\n \"stddev_ts\": 0.000632,\n \"samples_ns\": [ 44188307664, 44183807854, 44187560970 ],\n \"samples_ts\": [ 11.5868, 11.588, 11.587 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:45:06Z\",\n \"avg_ns\": 15698512609,\n \"stddev_ns\": 1309984,\n \"avg_ts\": 8.153639,\n \"stddev_ts\": 0.000680,\n \"samples_ns\": [ 15698008350, 15699999790, 15697529687 ],\n \"samples_ts\": [ 8.1539, 8.15287, 8.15415 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:42:10Z", + "avg_ns": 44186558829, + "stddev_ns": 2420634, + "avg_ts": 11.587234, + "stddev_ts": 0.000632, + "samples_ns": [ + 44188307664, + 44183807854, + 44187560970 + ], + "samples_ts": [ + 11.5868, + 11.588, + 11.587 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T01:45:06Z", + "avg_ns": 15698512609, + "stddev_ns": 1309984, + "avg_ts": 8.153639, + "stddev_ts": 0.00068, + "samples_ns": [ + 15698008350, + 15699999790, + 15697529687 + ], + "samples_ts": [ + 8.1539, + 8.15287, + 8.15415 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 450 + }, + { + "timestamp_utc": "2025-12-09T01:52:01.964938+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:45:54Z\",\n \"avg_ns\": 44168338621,\n \"stddev_ns\": 1302838,\n \"avg_ts\": 11.592014,\n \"stddev_ts\": 0.000342,\n \"samples_ns\": [ 44169773054, 44167228735, 44168014074 ],\n \"samples_ts\": [ 11.5916, 11.5923, 11.5921 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:48:51Z\",\n \"avg_ns\": 63363276963,\n \"stddev_ns\": 1722740,\n \"avg_ts\": 8.080390,\n \"stddev_ts\": 0.000220,\n \"samples_ns\": [ 63365014498, 63363246983, 63361569408 ],\n \"samples_ts\": [ 8.08017, 8.08039, 8.08061 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:45:54Z", + "avg_ns": 44168338621, + "stddev_ns": 1302838, + "avg_ts": 11.592014, + "stddev_ts": 0.000342, + "samples_ns": [ + 44169773054, + 44167228735, + 44168014074 + ], + "samples_ts": [ + 11.5916, + 11.5923, + 11.5921 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T01:48:51Z", + "avg_ns": 63363276963, + "stddev_ns": 1722740, + "avg_ts": 8.08039, + "stddev_ts": 0.00022, + "samples_ns": [ + 63365014498, + 63363246983, + 63361569408 + ], + "samples_ts": [ + 8.08017, + 8.08039, + 8.08061 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 451 + }, + { + "timestamp_utc": "2025-12-09T01:53:34.078626+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:52:02Z\",\n \"avg_ns\": 10970436941,\n \"stddev_ns\": 641953,\n \"avg_ts\": 11.667721,\n \"stddev_ts\": 0.000664,\n \"samples_ns\": [ 10970632074, 10970940712, 10969738039 ],\n \"samples_ts\": [ 11.6675, 11.6672, 11.6685 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:52:46Z\",\n \"avg_ns\": 15727028715,\n \"stddev_ns\": 3695887,\n \"avg_ts\": 8.138855,\n \"stddev_ts\": 0.001910,\n \"samples_ns\": [ 15727003641, 15730732819, 15723349687 ],\n \"samples_ts\": [ 8.13887, 8.13694, 8.14076 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:52:02Z", + "avg_ns": 10970436941, + "stddev_ns": 641953, + "avg_ts": 11.667721, + "stddev_ts": 0.000664, + "samples_ns": [ + 10970632074, + 10970940712, + 10969738039 + ], + "samples_ts": [ + 11.6675, + 11.6672, + 11.6685 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T01:52:46Z", + "avg_ns": 15727028715, + "stddev_ns": 3695887, + "avg_ts": 8.138855, + "stddev_ts": 0.00191, + "samples_ns": [ + 15727003641, + 15730732819, + 15723349687 + ], + "samples_ts": [ + 8.13887, + 8.13694, + 8.14076 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 452 + }, + { + "timestamp_utc": "2025-12-09T01:57:29.621592+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:53:34Z\",\n \"avg_ns\": 10977246463,\n \"stddev_ns\": 1289433,\n \"avg_ts\": 11.660483,\n \"stddev_ts\": 0.001370,\n \"samples_ns\": [ 10978498344, 10977318544, 10975922501 ],\n \"samples_ts\": [ 11.6592, 11.6604, 11.6619 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:54:18Z\",\n \"avg_ns\": 63525754857,\n \"stddev_ns\": 496945,\n \"avg_ts\": 8.059723,\n \"stddev_ts\": 0.000054,\n \"samples_ns\": [ 63525302297, 63526153824, 63525808451 ],\n \"samples_ts\": [ 8.05978, 8.05967, 8.05972 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:53:34Z", + "avg_ns": 10977246463, + "stddev_ns": 1289433, + "avg_ts": 11.660483, + "stddev_ts": 0.00137, + "samples_ns": [ + 10978498344, + 10977318544, + 10975922501 + ], + "samples_ts": [ + 11.6592, + 11.6604, + 11.6619 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T01:54:18Z", + "avg_ns": 63525754857, + "stddev_ns": 496945, + "avg_ts": 8.059723, + "stddev_ts": 5.4e-05, + "samples_ns": [ + 63525302297, + 63526153824, + 63525808451 + ], + "samples_ts": [ + 8.05978, + 8.05967, + 8.05972 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 453 + }, + { + "timestamp_utc": "2025-12-09T02:01:16.146035+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T01:57:30Z\",\n \"avg_ns\": 44526698171,\n \"stddev_ns\": 1585785,\n \"avg_ts\": 11.498719,\n \"stddev_ts\": 0.000406,\n \"samples_ns\": [ 44526425964, 44528388179, 44525280371 ],\n \"samples_ts\": [ 11.4988, 11.4983, 11.4991 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:00:28Z\",\n \"avg_ns\": 15767425865,\n \"stddev_ns\": 3553359,\n \"avg_ts\": 8.118003,\n \"stddev_ts\": 0.001829,\n \"samples_ns\": [ 15764715973, 15766112717, 15771448905 ],\n \"samples_ts\": [ 8.1194, 8.11868, 8.11593 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T01:57:30Z", + "avg_ns": 44526698171, + "stddev_ns": 1585785, + "avg_ts": 11.498719, + "stddev_ts": 0.000406, + "samples_ns": [ + 44526425964, + 44528388179, + 44525280371 + ], + "samples_ts": [ + 11.4988, + 11.4983, + 11.4991 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T02:00:28Z", + "avg_ns": 15767425865, + "stddev_ns": 3553359, + "avg_ts": 8.118003, + "stddev_ts": 0.001829, + "samples_ns": [ + 15764715973, + 15766112717, + 15771448905 + ], + "samples_ts": [ + 8.1194, + 8.11868, + 8.11593 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 454 + }, + { + "timestamp_utc": "2025-12-09T02:07:26.496787+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:01:17Z\",\n \"avg_ns\": 44540252571,\n \"stddev_ns\": 715265,\n \"avg_ts\": 11.495220,\n \"stddev_ts\": 0.000176,\n \"samples_ns\": [ 44540811745, 44539490741, 44540455228 ],\n \"samples_ts\": [ 11.4951, 11.4954, 11.4952 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:04:15Z\",\n \"avg_ns\": 63691607744,\n \"stddev_ns\": 16596049,\n \"avg_ts\": 8.038736,\n \"stddev_ts\": 0.002094,\n \"samples_ns\": [ 63686006871, 63678540789, 63710275574 ],\n \"samples_ts\": [ 8.03944, 8.04039, 8.03638 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:01:17Z", + "avg_ns": 44540252571, + "stddev_ns": 715265, + "avg_ts": 11.49522, + "stddev_ts": 0.000176, + "samples_ns": [ + 44540811745, + 44539490741, + 44540455228 + ], + "samples_ts": [ + 11.4951, + 11.4954, + 11.4952 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T02:04:15Z", + "avg_ns": 63691607744, + "stddev_ns": 16596049, + "avg_ts": 8.038736, + "stddev_ts": 0.002094, + "samples_ns": [ + 63686006871, + 63678540789, + 63710275574 + ], + "samples_ts": [ + 8.03944, + 8.04039, + 8.03638 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 455 + }, + { + "timestamp_utc": "2025-12-09T02:08:58.529578+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:07:27Z\",\n \"avg_ns\": 10976500971,\n \"stddev_ns\": 730455,\n \"avg_ts\": 11.661275,\n \"stddev_ts\": 0.000776,\n \"samples_ns\": [ 10976589477, 10977183141, 10975730295 ],\n \"samples_ts\": [ 11.6612, 11.6606, 11.6621 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:08:11Z\",\n \"avg_ns\": 15688250544,\n \"stddev_ns\": 1221037,\n \"avg_ts\": 8.158972,\n \"stddev_ts\": 0.000632,\n \"samples_ns\": [ 15689078081, 15688817404, 15686856148 ],\n \"samples_ts\": [ 8.15854, 8.15868, 8.1597 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:07:27Z", + "avg_ns": 10976500971, + "stddev_ns": 730455, + "avg_ts": 11.661275, + "stddev_ts": 0.000776, + "samples_ns": [ + 10976589477, + 10977183141, + 10975730295 + ], + "samples_ts": [ + 11.6612, + 11.6606, + 11.6621 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T02:08:11Z", + "avg_ns": 15688250544, + "stddev_ns": 1221037, + "avg_ts": 8.158972, + "stddev_ts": 0.000632, + "samples_ns": [ + 15689078081, + 15688817404, + 15686856148 + ], + "samples_ts": [ + 8.15854, + 8.15868, + 8.1597 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 456 + }, + { + "timestamp_utc": "2025-12-09T02:12:53.627042+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:08:59Z\",\n \"avg_ns\": 10983741346,\n \"stddev_ns\": 995257,\n \"avg_ts\": 11.653588,\n \"stddev_ts\": 0.001056,\n \"samples_ns\": [ 10983650384, 10984778962, 10982794692 ],\n \"samples_ts\": [ 11.6537, 11.6525, 11.6546 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:09:43Z\",\n \"avg_ns\": 63368580588,\n \"stddev_ns\": 1050229,\n \"avg_ts\": 8.079714,\n \"stddev_ts\": 0.000130,\n \"samples_ns\": [ 63367543161, 63368617200, 63369581404 ],\n \"samples_ts\": [ 8.07985, 8.07971, 8.07959 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:08:59Z", + "avg_ns": 10983741346, + "stddev_ns": 995257, + "avg_ts": 11.653588, + "stddev_ts": 0.001056, + "samples_ns": [ + 10983650384, + 10984778962, + 10982794692 + ], + "samples_ts": [ + 11.6537, + 11.6525, + 11.6546 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T02:09:43Z", + "avg_ns": 63368580588, + "stddev_ns": 1050229, + "avg_ts": 8.079714, + "stddev_ts": 0.00013, + "samples_ns": [ + 63367543161, + 63368617200, + 63369581404 + ], + "samples_ts": [ + 8.07985, + 8.07971, + 8.07959 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 457 + }, + { + "timestamp_utc": "2025-12-09T02:16:38.384516+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:12:54Z\",\n \"avg_ns\": 44164525338,\n \"stddev_ns\": 1681180,\n \"avg_ts\": 11.593015,\n \"stddev_ts\": 0.000441,\n \"samples_ns\": [ 44165851025, 44162634372, 44165090617 ],\n \"samples_ts\": [ 11.5927, 11.5935, 11.5929 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:15:51Z\",\n \"avg_ns\": 15681183575,\n \"stddev_ns\": 2198619,\n \"avg_ts\": 8.162649,\n \"stddev_ts\": 0.001141,\n \"samples_ns\": [ 15683462683, 15680996291, 15679091753 ],\n \"samples_ts\": [ 8.16146, 8.16275, 8.16374 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:12:54Z", + "avg_ns": 44164525338, + "stddev_ns": 1681180, + "avg_ts": 11.593015, + "stddev_ts": 0.000441, + "samples_ns": [ + 44165851025, + 44162634372, + 44165090617 + ], + "samples_ts": [ + 11.5927, + 11.5935, + 11.5929 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T02:15:51Z", + "avg_ns": 15681183575, + "stddev_ns": 2198619, + "avg_ts": 8.162649, + "stddev_ts": 0.001141, + "samples_ns": [ + 15683462683, + 15680996291, + 15679091753 + ], + "samples_ts": [ + 8.16146, + 8.16275, + 8.16374 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 458 + }, + { + "timestamp_utc": "2025-12-09T02:22:46.058379+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:16:39Z\",\n \"avg_ns\": 44157876857,\n \"stddev_ns\": 635881,\n \"avg_ts\": 11.594760,\n \"stddev_ts\": 0.000167,\n \"samples_ns\": [ 44157217823, 44158486733, 44157926015 ],\n \"samples_ts\": [ 11.5949, 11.5946, 11.5947 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:19:35Z\",\n \"avg_ns\": 63326367645,\n \"stddev_ns\": 2885835,\n \"avg_ts\": 8.085100,\n \"stddev_ts\": 0.000367,\n \"samples_ns\": [ 63323048503, 63328074017, 63327980416 ],\n \"samples_ts\": [ 8.08552, 8.08488, 8.08489 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:16:39Z", + "avg_ns": 44157876857, + "stddev_ns": 635881, + "avg_ts": 11.59476, + "stddev_ts": 0.000167, + "samples_ns": [ + 44157217823, + 44158486733, + 44157926015 + ], + "samples_ts": [ + 11.5949, + 11.5946, + 11.5947 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T02:19:35Z", + "avg_ns": 63326367645, + "stddev_ns": 2885835, + "avg_ts": 8.0851, + "stddev_ts": 0.000367, + "samples_ns": [ + 63323048503, + 63328074017, + 63327980416 + ], + "samples_ts": [ + 8.08552, + 8.08488, + 8.08489 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 459 + }, + { + "timestamp_utc": "2025-12-09T02:24:18.092813+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:22:46Z\",\n \"avg_ns\": 10980594625,\n \"stddev_ns\": 697070,\n \"avg_ts\": 11.656928,\n \"stddev_ts\": 0.000723,\n \"samples_ns\": [ 10980381973, 10981356719, 10980045185 ],\n \"samples_ts\": [ 11.6572, 11.6561, 11.6575 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:23:30Z\",\n \"avg_ns\": 15677851227,\n \"stddev_ns\": 625707,\n \"avg_ts\": 8.164384,\n \"stddev_ts\": 0.000326,\n \"samples_ns\": [ 15678571063, 15677545036, 15677437582 ],\n \"samples_ts\": [ 8.16401, 8.16454, 8.1646 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:22:46Z", + "avg_ns": 10980594625, + "stddev_ns": 697070, + "avg_ts": 11.656928, + "stddev_ts": 0.000723, + "samples_ns": [ + 10980381973, + 10981356719, + 10980045185 + ], + "samples_ts": [ + 11.6572, + 11.6561, + 11.6575 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T02:23:30Z", + "avg_ns": 15677851227, + "stddev_ns": 625707, + "avg_ts": 8.164384, + "stddev_ts": 0.000326, + "samples_ns": [ + 15678571063, + 15677545036, + 15677437582 + ], + "samples_ts": [ + 8.16401, + 8.16454, + 8.1646 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 460 + }, + { + "timestamp_utc": "2025-12-09T02:28:13.031218+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:24:18Z\",\n \"avg_ns\": 10976604209,\n \"stddev_ns\": 1112389,\n \"avg_ts\": 11.661166,\n \"stddev_ts\": 0.001171,\n \"samples_ns\": [ 10975744143, 10977847060, 10976221426 ],\n \"samples_ts\": [ 11.6621, 11.6598, 11.6616 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:25:02Z\",\n \"avg_ns\": 63324478218,\n \"stddev_ns\": 1082122,\n \"avg_ts\": 8.085341,\n \"stddev_ts\": 0.000138,\n \"samples_ns\": [ 63324848100, 63325326902, 63323259652 ],\n \"samples_ts\": [ 8.08529, 8.08523, 8.0855 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:24:18Z", + "avg_ns": 10976604209, + "stddev_ns": 1112389, + "avg_ts": 11.661166, + "stddev_ts": 0.001171, + "samples_ns": [ + 10975744143, + 10977847060, + 10976221426 + ], + "samples_ts": [ + 11.6621, + 11.6598, + 11.6616 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T02:25:02Z", + "avg_ns": 63324478218, + "stddev_ns": 1082122, + "avg_ts": 8.085341, + "stddev_ts": 0.000138, + "samples_ns": [ + 63324848100, + 63325326902, + 63323259652 + ], + "samples_ts": [ + 8.08529, + 8.08523, + 8.0855 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 461 + }, + { + "timestamp_utc": "2025-12-09T02:31:57.955751+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:28:13Z\",\n \"avg_ns\": 44175735318,\n \"stddev_ns\": 3067510,\n \"avg_ts\": 11.590073,\n \"stddev_ts\": 0.000805,\n \"samples_ns\": [ 44178958542, 44175395611, 44172851801 ],\n \"samples_ts\": [ 11.5892, 11.5902, 11.5908 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:31:10Z\",\n \"avg_ns\": 15689176530,\n \"stddev_ns\": 3102817,\n \"avg_ts\": 8.158491,\n \"stddev_ts\": 0.001612,\n \"samples_ns\": [ 15686557076, 15692599461, 15688373054 ],\n \"samples_ts\": [ 8.15985, 8.15671, 8.15891 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:28:13Z", + "avg_ns": 44175735318, + "stddev_ns": 3067510, + "avg_ts": 11.590073, + "stddev_ts": 0.000805, + "samples_ns": [ + 44178958542, + 44175395611, + 44172851801 + ], + "samples_ts": [ + 11.5892, + 11.5902, + 11.5908 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T02:31:10Z", + "avg_ns": 15689176530, + "stddev_ns": 3102817, + "avg_ts": 8.158491, + "stddev_ts": 0.001612, + "samples_ns": [ + 15686557076, + 15692599461, + 15688373054 + ], + "samples_ts": [ + 8.15985, + 8.15671, + 8.15891 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 462 + }, + { + "timestamp_utc": "2025-12-09T02:38:06.165686+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:31:58Z\",\n \"avg_ns\": 44197833817,\n \"stddev_ns\": 1676768,\n \"avg_ts\": 11.584278,\n \"stddev_ts\": 0.000439,\n \"samples_ns\": [ 44199191473, 44198350452, 44195959526 ],\n \"samples_ts\": [ 11.5839, 11.5841, 11.5848 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:34:55Z\",\n \"avg_ns\": 63452033611,\n \"stddev_ns\": 6360740,\n \"avg_ts\": 8.069087,\n \"stddev_ts\": 0.000808,\n \"samples_ns\": [ 63459366340, 63448441000, 63448293495 ],\n \"samples_ts\": [ 8.06815, 8.06954, 8.06956 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:31:58Z", + "avg_ns": 44197833817, + "stddev_ns": 1676768, + "avg_ts": 11.584278, + "stddev_ts": 0.000439, + "samples_ns": [ + 44199191473, + 44198350452, + 44195959526 + ], + "samples_ts": [ + 11.5839, + 11.5841, + 11.5848 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T02:34:55Z", + "avg_ns": 63452033611, + "stddev_ns": 6360740, + "avg_ts": 8.069087, + "stddev_ts": 0.000808, + "samples_ns": [ + 63459366340, + 63448441000, + 63448293495 + ], + "samples_ts": [ + 8.06815, + 8.06954, + 8.06956 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 463 + }, + { + "timestamp_utc": "2025-12-09T02:39:38.285290+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:38:07Z\",\n \"avg_ns\": 10979814219,\n \"stddev_ns\": 535662,\n \"avg_ts\": 11.657756,\n \"stddev_ts\": 0.000547,\n \"samples_ns\": [ 10980406262, 10979563909, 10979472488 ],\n \"samples_ts\": [ 11.6571, 11.658, 11.6581 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:38:51Z\",\n \"avg_ns\": 15676503063,\n \"stddev_ns\": 1215501,\n \"avg_ts\": 8.165086,\n \"stddev_ts\": 0.000626,\n \"samples_ns\": [ 15676364958, 15677768689, 15675375544 ],\n \"samples_ts\": [ 8.16516, 8.16443, 8.16567 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:38:07Z", + "avg_ns": 10979814219, + "stddev_ns": 535662, + "avg_ts": 11.657756, + "stddev_ts": 0.000547, + "samples_ns": [ + 10980406262, + 10979563909, + 10979472488 + ], + "samples_ts": [ + 11.6571, + 11.658, + 11.6581 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T02:38:51Z", + "avg_ns": 15676503063, + "stddev_ns": 1215501, + "avg_ts": 8.165086, + "stddev_ts": 0.000626, + "samples_ns": [ + 15676364958, + 15677768689, + 15675375544 + ], + "samples_ts": [ + 8.16516, + 8.16443, + 8.16567 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 464 + }, + { + "timestamp_utc": "2025-12-09T02:43:34.189464+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:39:39Z\",\n \"avg_ns\": 10974300323,\n \"stddev_ns\": 1327485,\n \"avg_ts\": 11.663614,\n \"stddev_ts\": 0.001411,\n \"samples_ns\": [ 10972801343, 10974772304, 10975327322 ],\n \"samples_ts\": [ 11.6652, 11.6631, 11.6625 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:40:23Z\",\n \"avg_ns\": 63646900098,\n \"stddev_ns\": 2267497,\n \"avg_ts\": 8.044382,\n \"stddev_ts\": 0.000287,\n \"samples_ns\": [ 63648243490, 63644282119, 63648174685 ],\n \"samples_ts\": [ 8.04421, 8.04471, 8.04422 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:39:39Z", + "avg_ns": 10974300323, + "stddev_ns": 1327485, + "avg_ts": 11.663614, + "stddev_ts": 0.001411, + "samples_ns": [ + 10972801343, + 10974772304, + 10975327322 + ], + "samples_ts": [ + 11.6652, + 11.6631, + 11.6625 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T02:40:23Z", + "avg_ns": 63646900098, + "stddev_ns": 2267497, + "avg_ts": 8.044382, + "stddev_ts": 0.000287, + "samples_ns": [ + 63648243490, + 63644282119, + 63648174685 + ], + "samples_ts": [ + 8.04421, + 8.04471, + 8.04422 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 465 + }, + { + "timestamp_utc": "2025-12-09T02:47:25.015848+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:43:35Z\",\n \"avg_ns\": 45544316214,\n \"stddev_ns\": 2220396,\n \"avg_ts\": 11.241798,\n \"stddev_ts\": 0.000548,\n \"samples_ns\": [ 45544111103, 45546632049, 45542205490 ],\n \"samples_ts\": [ 11.2418, 11.2412, 11.2423 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:46:37Z\",\n \"avg_ns\": 15851901873,\n \"stddev_ns\": 228136909,\n \"avg_ts\": 8.075847,\n \"stddev_ts\": 0.115268,\n \"samples_ns\": [ 16115330494, 15719501563, 15720873562 ],\n \"samples_ts\": [ 7.94275, 8.14275, 8.14204 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:43:35Z", + "avg_ns": 45544316214, + "stddev_ns": 2220396, + "avg_ts": 11.241798, + "stddev_ts": 0.000548, + "samples_ns": [ + 45544111103, + 45546632049, + 45542205490 + ], + "samples_ts": [ + 11.2418, + 11.2412, + 11.2423 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T02:46:37Z", + "avg_ns": 15851901873, + "stddev_ns": 228136909, + "avg_ts": 8.075847, + "stddev_ts": 0.115268, + "samples_ns": [ + 16115330494, + 15719501563, + 15720873562 + ], + "samples_ts": [ + 7.94275, + 8.14275, + 8.14204 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 466 + }, + { + "timestamp_utc": "2025-12-09T02:53:34.810338+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:47:25Z\",\n \"avg_ns\": 44549294304,\n \"stddev_ns\": 612582,\n \"avg_ts\": 11.492887,\n \"stddev_ts\": 0.000138,\n \"samples_ns\": [ 44548736911, 44549803529, 44549342474 ],\n \"samples_ts\": [ 11.493, 11.4928, 11.4929 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:50:24Z\",\n \"avg_ns\": 63504790957,\n \"stddev_ns\": 12175827,\n \"avg_ts\": 8.062384,\n \"stddev_ts\": 0.001545,\n \"samples_ns\": [ 63518725324, 63499404546, 63496243003 ],\n \"samples_ts\": [ 8.06062, 8.06307, 8.06347 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:47:25Z", + "avg_ns": 44549294304, + "stddev_ns": 612582, + "avg_ts": 11.492887, + "stddev_ts": 0.000138, + "samples_ns": [ + 44548736911, + 44549803529, + 44549342474 + ], + "samples_ts": [ + 11.493, + 11.4928, + 11.4929 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T02:50:24Z", + "avg_ns": 63504790957, + "stddev_ns": 12175827, + "avg_ts": 8.062384, + "stddev_ts": 0.001545, + "samples_ns": [ + 63518725324, + 63499404546, + 63496243003 + ], + "samples_ts": [ + 8.06062, + 8.06307, + 8.06347 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 467 + }, + { + "timestamp_utc": "2025-12-09T02:54:29.170758+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:53:35Z\",\n \"avg_ns\": 5520131286,\n \"stddev_ns\": 1778695,\n \"avg_ts\": 23.187856,\n \"stddev_ts\": 0.007471,\n \"samples_ns\": [ 5522067445, 5519756690, 5518569723 ],\n \"samples_ts\": [ 23.1797, 23.1894, 23.1944 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:53:57Z\",\n \"avg_ns\": 10421636577,\n \"stddev_ns\": 1791163,\n \"avg_ts\": 12.282140,\n \"stddev_ts\": 0.002107,\n \"samples_ns\": [ 10423700035, 10420671567, 10420538130 ],\n \"samples_ts\": [ 12.2797, 12.2833, 12.2834 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:53:35Z", + "avg_ns": 5520131286, + "stddev_ns": 1778695, + "avg_ts": 23.187856, + "stddev_ts": 0.007471, + "samples_ns": [ + 5522067445, + 5519756690, + 5518569723 + ], + "samples_ts": [ + 23.1797, + 23.1894, + 23.1944 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T02:53:57Z", + "avg_ns": 10421636577, + "stddev_ns": 1791163, + "avg_ts": 12.28214, + "stddev_ts": 0.002107, + "samples_ns": [ + 10423700035, + 10420671567, + 10420538130 + ], + "samples_ts": [ + 12.2797, + 12.2833, + 12.2834 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 468 + }, + { + "timestamp_utc": "2025-12-09T02:56:57.623923+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:54:30Z\",\n \"avg_ns\": 5519976204,\n \"stddev_ns\": 1218462,\n \"avg_ts\": 23.188507,\n \"stddev_ts\": 0.005109,\n \"samples_ns\": [ 5518992583, 5519599972, 5521336058 ],\n \"samples_ts\": [ 23.1926, 23.1901, 23.1828 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:54:52Z\",\n \"avg_ns\": 41779465276,\n \"stddev_ns\": 3653125,\n \"avg_ts\": 12.254824,\n \"stddev_ts\": 0.001071,\n \"samples_ns\": [ 41783635159, 41777932043, 41776828626 ],\n \"samples_ts\": [ 12.2536, 12.2553, 12.2556 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:54:30Z", + "avg_ns": 5519976204, + "stddev_ns": 1218462, + "avg_ts": 23.188507, + "stddev_ts": 0.005109, + "samples_ns": [ + 5518992583, + 5519599972, + 5521336058 + ], + "samples_ts": [ + 23.1926, + 23.1901, + 23.1828 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T02:54:52Z", + "avg_ns": 41779465276, + "stddev_ns": 3653125, + "avg_ts": 12.254824, + "stddev_ts": 0.001071, + "samples_ns": [ + 41783635159, + 41777932043, + 41776828626 + ], + "samples_ts": [ + 12.2536, + 12.2553, + 12.2556 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 469 + }, + { + "timestamp_utc": "2025-12-09T02:58:59.484377+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:56:58Z\",\n \"avg_ns\": 22364201636,\n \"stddev_ns\": 151702901,\n \"avg_ts\": 22.894435,\n \"stddev_ts\": 0.155910,\n \"samples_ns\": [ 22452422481, 22451150589, 22189031839 ],\n \"samples_ts\": [ 22.8038, 22.8051, 23.0745 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:58:28Z\",\n \"avg_ns\": 10424638889,\n \"stddev_ns\": 2710395,\n \"avg_ts\": 12.278603,\n \"stddev_ts\": 0.003188,\n \"samples_ns\": [ 10421982594, 10427393033, 10424541042 ],\n \"samples_ts\": [ 12.2817, 12.2754, 12.2787 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:56:58Z", + "avg_ns": 22364201636, + "stddev_ns": 151702901, + "avg_ts": 22.894435, + "stddev_ts": 0.15591, + "samples_ns": [ + 22452422481, + 22451150589, + 22189031839 + ], + "samples_ts": [ + 22.8038, + 22.8051, + 23.0745 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T02:58:28Z", + "avg_ns": 10424638889, + "stddev_ns": 2710395, + "avg_ts": 12.278603, + "stddev_ts": 0.003188, + "samples_ns": [ + 10421982594, + 10427393033, + 10424541042 + ], + "samples_ts": [ + 12.2817, + 12.2754, + 12.2787 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 470 + }, + { + "timestamp_utc": "2025-12-09T03:02:34.447478+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T02:59:00Z\",\n \"avg_ns\": 22174778786,\n \"stddev_ns\": 1052799,\n \"avg_ts\": 23.089295,\n \"stddev_ts\": 0.001085,\n \"samples_ns\": [ 22173577030, 22175324447, 22175434882 ],\n \"samples_ts\": [ 23.0905, 23.0887, 23.0886 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:00:29Z\",\n \"avg_ns\": 41748720987,\n \"stddev_ns\": 4381408,\n \"avg_ts\": 12.263849,\n \"stddev_ts\": 0.001287,\n \"samples_ns\": [ 41750582193, 41743716237, 41751864531 ],\n \"samples_ts\": [ 12.2633, 12.2653, 12.2629 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T02:59:00Z", + "avg_ns": 22174778786, + "stddev_ns": 1052799, + "avg_ts": 23.089295, + "stddev_ts": 0.001085, + "samples_ns": [ + 22173577030, + 22175324447, + 22175434882 + ], + "samples_ts": [ + 23.0905, + 23.0887, + 23.0886 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:00:29Z", + "avg_ns": 41748720987, + "stddev_ns": 4381408, + "avg_ts": 12.263849, + "stddev_ts": 0.001287, + "samples_ns": [ + 41750582193, + 41743716237, + 41751864531 + ], + "samples_ts": [ + 12.2633, + 12.2653, + 12.2629 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 471 + }, + { + "timestamp_utc": "2025-12-09T03:03:28.589447+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:02:35Z\",\n \"avg_ns\": 5520821691,\n \"stddev_ns\": 308122,\n \"avg_ts\": 23.184955,\n \"stddev_ts\": 0.001294,\n \"samples_ns\": [ 5520490976, 5520873426, 5521100671 ],\n \"samples_ts\": [ 23.1863, 23.1847, 23.1838 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:02:57Z\",\n \"avg_ns\": 10347088549,\n \"stddev_ns\": 2604161,\n \"avg_ts\": 12.370630,\n \"stddev_ts\": 0.003113,\n \"samples_ns\": [ 10346883923, 10349788987, 10344592737 ],\n \"samples_ts\": [ 12.3709, 12.3674, 12.3736 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:02:35Z", + "avg_ns": 5520821691, + "stddev_ns": 308122, + "avg_ts": 23.184955, + "stddev_ts": 0.001294, + "samples_ns": [ + 5520490976, + 5520873426, + 5521100671 + ], + "samples_ts": [ + 23.1863, + 23.1847, + 23.1838 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:02:57Z", + "avg_ns": 10347088549, + "stddev_ns": 2604161, + "avg_ts": 12.37063, + "stddev_ts": 0.003113, + "samples_ns": [ + 10346883923, + 10349788987, + 10344592737 + ], + "samples_ts": [ + 12.3709, + 12.3674, + 12.3736 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 472 + }, + { + "timestamp_utc": "2025-12-09T03:05:57.047274+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:03:29Z\",\n \"avg_ns\": 5523392285,\n \"stddev_ns\": 349905,\n \"avg_ts\": 23.174164,\n \"stddev_ts\": 0.001400,\n \"samples_ns\": [ 5523313386, 5523758413, 5523105058 ],\n \"samples_ts\": [ 23.1745, 23.1726, 23.1754 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:03:51Z\",\n \"avg_ns\": 41780695162,\n \"stddev_ns\": 2286212,\n \"avg_ts\": 12.254463,\n \"stddev_ts\": 0.000665,\n \"samples_ns\": [ 41780334272, 41778629383, 41783121833 ],\n \"samples_ts\": [ 12.2546, 12.2551, 12.2538 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:03:29Z", + "avg_ns": 5523392285, + "stddev_ns": 349905, + "avg_ts": 23.174164, + "stddev_ts": 0.0014, + "samples_ns": [ + 5523313386, + 5523758413, + 5523105058 + ], + "samples_ts": [ + 23.1745, + 23.1726, + 23.1754 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:03:51Z", + "avg_ns": 41780695162, + "stddev_ns": 2286212, + "avg_ts": 12.254463, + "stddev_ts": 0.000665, + "samples_ns": [ + 41780334272, + 41778629383, + 41783121833 + ], + "samples_ts": [ + 12.2546, + 12.2551, + 12.2538 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 473 + }, + { + "timestamp_utc": "2025-12-09T03:07:57.923076+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:05:57Z\",\n \"avg_ns\": 22195151946,\n \"stddev_ns\": 480151,\n \"avg_ts\": 23.068101,\n \"stddev_ts\": 0.000499,\n \"samples_ns\": [ 22195594769, 22194641612, 22195219457 ],\n \"samples_ts\": [ 23.0676, 23.0686, 23.068 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:07:26Z\",\n \"avg_ns\": 10350483109,\n \"stddev_ns\": 865842,\n \"avg_ts\": 12.366573,\n \"stddev_ts\": 0.001035,\n \"samples_ns\": [ 10349485336, 10351036947, 10350927044 ],\n \"samples_ts\": [ 12.3678, 12.3659, 12.366 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:05:57Z", + "avg_ns": 22195151946, + "stddev_ns": 480151, + "avg_ts": 23.068101, + "stddev_ts": 0.000499, + "samples_ns": [ + 22195594769, + 22194641612, + 22195219457 + ], + "samples_ts": [ + 23.0676, + 23.0686, + 23.068 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:07:26Z", + "avg_ns": 10350483109, + "stddev_ns": 865842, + "avg_ts": 12.366573, + "stddev_ts": 0.001035, + "samples_ns": [ + 10349485336, + 10351036947, + 10350927044 + ], + "samples_ts": [ + 12.3678, + 12.3659, + 12.366 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 474 + }, + { + "timestamp_utc": "2025-12-09T03:11:33.070842+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:07:58Z\",\n \"avg_ns\": 22201611514,\n \"stddev_ns\": 519614,\n \"avg_ts\": 23.061389,\n \"stddev_ts\": 0.000517,\n \"samples_ns\": [ 22202034612, 22201736921, 22201063010 ],\n \"samples_ts\": [ 23.0609, 23.0613, 23.062 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:09:27Z\",\n \"avg_ns\": 41772007109,\n \"stddev_ns\": 640441,\n \"avg_ts\": 12.257012,\n \"stddev_ts\": 0.000178,\n \"samples_ns\": [ 41772699056, 41771757578, 41771564694 ],\n \"samples_ts\": [ 12.2568, 12.2571, 12.2571 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:07:58Z", + "avg_ns": 22201611514, + "stddev_ns": 519614, + "avg_ts": 23.061389, + "stddev_ts": 0.000517, + "samples_ns": [ + 22202034612, + 22201736921, + 22201063010 + ], + "samples_ts": [ + 23.0609, + 23.0613, + 23.062 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:09:27Z", + "avg_ns": 41772007109, + "stddev_ns": 640441, + "avg_ts": 12.257012, + "stddev_ts": 0.000178, + "samples_ns": [ + 41772699056, + 41771757578, + 41771564694 + ], + "samples_ts": [ + 12.2568, + 12.2571, + 12.2571 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 475 + }, + { + "timestamp_utc": "2025-12-09T03:12:27.265566+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:11:33Z\",\n \"avg_ns\": 5521534342,\n \"stddev_ns\": 977568,\n \"avg_ts\": 23.181963,\n \"stddev_ts\": 0.004080,\n \"samples_ns\": [ 5521025060, 5522655050, 5520922918 ],\n \"samples_ts\": [ 23.1841, 23.1773, 23.1845 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:11:56Z\",\n \"avg_ns\": 10349197683,\n \"stddev_ns\": 4729938,\n \"avg_ts\": 12.368110,\n \"stddev_ts\": 0.005650,\n \"samples_ns\": [ 10354155323, 10348698253, 10344739475 ],\n \"samples_ts\": [ 12.3622, 12.3687, 12.3734 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:11:33Z", + "avg_ns": 5521534342, + "stddev_ns": 977568, + "avg_ts": 23.181963, + "stddev_ts": 0.00408, + "samples_ns": [ + 5521025060, + 5522655050, + 5520922918 + ], + "samples_ts": [ + 23.1841, + 23.1773, + 23.1845 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:11:56Z", + "avg_ns": 10349197683, + "stddev_ns": 4729938, + "avg_ts": 12.36811, + "stddev_ts": 0.00565, + "samples_ns": [ + 10354155323, + 10348698253, + 10344739475 + ], + "samples_ts": [ + 12.3622, + 12.3687, + 12.3734 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 476 + }, + { + "timestamp_utc": "2025-12-09T03:14:56.938150+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:12:28Z\",\n \"avg_ns\": 5518989705,\n \"stddev_ns\": 92315,\n \"avg_ts\": 23.192651,\n \"stddev_ts\": 0.000388,\n \"samples_ns\": [ 5519039914, 5518883167, 5519046034 ],\n \"samples_ts\": [ 23.1924, 23.1931, 23.1924 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:12:50Z\",\n \"avg_ns\": 42189465564,\n \"stddev_ns\": 84884547,\n \"avg_ts\": 12.135764,\n \"stddev_ts\": 0.024389,\n \"samples_ns\": [ 42287473434, 42141531799, 42139391461 ],\n \"samples_ts\": [ 12.1076, 12.1495, 12.1502 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:12:28Z", + "avg_ns": 5518989705, + "stddev_ns": 92315, + "avg_ts": 23.192651, + "stddev_ts": 0.000388, + "samples_ns": [ + 5519039914, + 5518883167, + 5519046034 + ], + "samples_ts": [ + 23.1924, + 23.1931, + 23.1924 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:12:50Z", + "avg_ns": 42189465564, + "stddev_ns": 84884547, + "avg_ts": 12.135764, + "stddev_ts": 0.024389, + "samples_ns": [ + 42287473434, + 42141531799, + 42139391461 + ], + "samples_ts": [ + 12.1076, + 12.1495, + 12.1502 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 477 + }, + { + "timestamp_utc": "2025-12-09T03:16:58.825440+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:14:57Z\",\n \"avg_ns\": 22433895618,\n \"stddev_ns\": 338324,\n \"avg_ts\": 22.822608,\n \"stddev_ts\": 0.000309,\n \"samples_ns\": [ 22433903127, 22434195158, 22433588570 ],\n \"samples_ts\": [ 22.8226, 22.8223, 22.8229 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:16:27Z\",\n \"avg_ns\": 10375395120,\n \"stddev_ns\": 3756267,\n \"avg_ts\": 12.336881,\n \"stddev_ts\": 0.004465,\n \"samples_ns\": [ 10371233080, 10376423889, 10378528392 ],\n \"samples_ts\": [ 12.3418, 12.3357, 12.3332 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:14:57Z", + "avg_ns": 22433895618, + "stddev_ns": 338324, + "avg_ts": 22.822608, + "stddev_ts": 0.000309, + "samples_ns": [ + 22433903127, + 22434195158, + 22433588570 + ], + "samples_ts": [ + 22.8226, + 22.8223, + 22.8229 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:16:27Z", + "avg_ns": 10375395120, + "stddev_ns": 3756267, + "avg_ts": 12.336881, + "stddev_ts": 0.004465, + "samples_ns": [ + 10371233080, + 10376423889, + 10378528392 + ], + "samples_ts": [ + 12.3418, + 12.3357, + 12.3332 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 478 + }, + { + "timestamp_utc": "2025-12-09T03:20:34.942870+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:16:59Z\",\n \"avg_ns\": 22424362383,\n \"stddev_ns\": 179502,\n \"avg_ts\": 22.832310,\n \"stddev_ts\": 0.000101,\n \"samples_ns\": [ 22424446784, 22424386923, 22424253443 ],\n \"samples_ts\": [ 22.8322, 22.8323, 22.8324 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:18:29Z\",\n \"avg_ns\": 41780967167,\n \"stddev_ns\": 6714366,\n \"avg_ts\": 12.254384,\n \"stddev_ts\": 0.001969,\n \"samples_ns\": [ 41788500256, 41778788747, 41775612498 ],\n \"samples_ts\": [ 12.2522, 12.255, 12.256 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:16:59Z", + "avg_ns": 22424362383, + "stddev_ns": 179502, + "avg_ts": 22.83231, + "stddev_ts": 0.000101, + "samples_ns": [ + 22424446784, + 22424386923, + 22424253443 + ], + "samples_ts": [ + 22.8322, + 22.8323, + 22.8324 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:18:29Z", + "avg_ns": 41780967167, + "stddev_ns": 6714366, + "avg_ts": 12.254384, + "stddev_ts": 0.001969, + "samples_ns": [ + 41788500256, + 41778788747, + 41775612498 + ], + "samples_ts": [ + 12.2522, + 12.255, + 12.256 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 479 + }, + { + "timestamp_utc": "2025-12-09T03:21:29.136931+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:20:35Z\",\n \"avg_ns\": 5520236930,\n \"stddev_ns\": 377472,\n \"avg_ts\": 23.187411,\n \"stddev_ts\": 0.001586,\n \"samples_ns\": [ 5520547309, 5520346758, 5519816723 ],\n \"samples_ts\": [ 23.1861, 23.1869, 23.1892 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:20:57Z\",\n \"avg_ns\": 10364712474,\n \"stddev_ns\": 2647996,\n \"avg_ts\": 12.349595,\n \"stddev_ts\": 0.003153,\n \"samples_ns\": [ 10366307014, 10366172327, 10361658082 ],\n \"samples_ts\": [ 12.3477, 12.3479, 12.3532 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:20:35Z", + "avg_ns": 5520236930, + "stddev_ns": 377472, + "avg_ts": 23.187411, + "stddev_ts": 0.001586, + "samples_ns": [ + 5520547309, + 5520346758, + 5519816723 + ], + "samples_ts": [ + 23.1861, + 23.1869, + 23.1892 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:20:57Z", + "avg_ns": 10364712474, + "stddev_ns": 2647996, + "avg_ts": 12.349595, + "stddev_ts": 0.003153, + "samples_ns": [ + 10366307014, + 10366172327, + 10361658082 + ], + "samples_ts": [ + 12.3477, + 12.3479, + 12.3532 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 480 + }, + { + "timestamp_utc": "2025-12-09T03:23:57.683030+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:21:29Z\",\n \"avg_ns\": 5522375691,\n \"stddev_ns\": 306611,\n \"avg_ts\": 23.178430,\n \"stddev_ts\": 0.001249,\n \"samples_ns\": [ 5522709328, 5522138140, 5522279606 ],\n \"samples_ts\": [ 23.177, 23.1794, 23.1788 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:21:52Z\",\n \"avg_ns\": 41810875168,\n \"stddev_ns\": 1769789,\n \"avg_ts\": 12.245618,\n \"stddev_ts\": 0.000518,\n \"samples_ns\": [ 41808831624, 41811886902, 41811906978 ],\n \"samples_ts\": [ 12.2462, 12.2453, 12.2453 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:21:29Z", + "avg_ns": 5522375691, + "stddev_ns": 306611, + "avg_ts": 23.17843, + "stddev_ts": 0.001249, + "samples_ns": [ + 5522709328, + 5522138140, + 5522279606 + ], + "samples_ts": [ + 23.177, + 23.1794, + 23.1788 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:21:52Z", + "avg_ns": 41810875168, + "stddev_ns": 1769789, + "avg_ts": 12.245618, + "stddev_ts": 0.000518, + "samples_ns": [ + 41808831624, + 41811886902, + 41811906978 + ], + "samples_ts": [ + 12.2462, + 12.2453, + 12.2453 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 481 + }, + { + "timestamp_utc": "2025-12-09T03:25:58.498465+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:23:58Z\",\n \"avg_ns\": 22177329409,\n \"stddev_ns\": 1385148,\n \"avg_ts\": 23.086639,\n \"stddev_ts\": 0.001425,\n \"samples_ns\": [ 22175889053, 22178613778, 22177485398 ],\n \"samples_ts\": [ 23.0881, 23.0853, 23.0865 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:25:27Z\",\n \"avg_ns\": 10362945834,\n \"stddev_ns\": 17669008,\n \"avg_ts\": 12.351724,\n \"stddev_ts\": 0.021040,\n \"samples_ns\": [ 10383252927, 10351086314, 10354498261 ],\n \"samples_ts\": [ 12.3275, 12.3659, 12.3618 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:23:58Z", + "avg_ns": 22177329409, + "stddev_ns": 1385148, + "avg_ts": 23.086639, + "stddev_ts": 0.001425, + "samples_ns": [ + 22175889053, + 22178613778, + 22177485398 + ], + "samples_ts": [ + 23.0881, + 23.0853, + 23.0865 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:25:27Z", + "avg_ns": 10362945834, + "stddev_ns": 17669008, + "avg_ts": 12.351724, + "stddev_ts": 0.02104, + "samples_ns": [ + 10383252927, + 10351086314, + 10354498261 + ], + "samples_ts": [ + 12.3275, + 12.3659, + 12.3618 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 482 + }, + { + "timestamp_utc": "2025-12-09T03:29:33.454058+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:25:59Z\",\n \"avg_ns\": 22172177856,\n \"stddev_ns\": 1024332,\n \"avg_ts\": 23.092003,\n \"stddev_ts\": 0.001044,\n \"samples_ns\": [ 22172406186, 22173046449, 22171080935 ],\n \"samples_ts\": [ 23.0918, 23.0911, 23.0931 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:27:28Z\",\n \"avg_ns\": 41748489754,\n \"stddev_ns\": 3249020,\n \"avg_ts\": 12.263917,\n \"stddev_ts\": 0.000953,\n \"samples_ns\": [ 41751793810, 41745312341, 41748363112 ],\n \"samples_ts\": [ 12.2629, 12.2649, 12.264 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:25:59Z", + "avg_ns": 22172177856, + "stddev_ns": 1024332, + "avg_ts": 23.092003, + "stddev_ts": 0.001044, + "samples_ns": [ + 22172406186, + 22173046449, + 22171080935 + ], + "samples_ts": [ + 23.0918, + 23.0911, + 23.0931 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:27:28Z", + "avg_ns": 41748489754, + "stddev_ns": 3249020, + "avg_ts": 12.263917, + "stddev_ts": 0.000953, + "samples_ns": [ + 41751793810, + 41745312341, + 41748363112 + ], + "samples_ts": [ + 12.2629, + 12.2649, + 12.264 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 483 + }, + { + "timestamp_utc": "2025-12-09T03:30:27.645170+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:29:34Z\",\n \"avg_ns\": 5520717156,\n \"stddev_ns\": 385549,\n \"avg_ts\": 23.185394,\n \"stddev_ts\": 0.001619,\n \"samples_ns\": [ 5520331008, 5521102104, 5520718356 ],\n \"samples_ts\": [ 23.187, 23.1838, 23.1854 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:29:56Z\",\n \"avg_ns\": 10362822876,\n \"stddev_ns\": 1077302,\n \"avg_ts\": 12.351847,\n \"stddev_ts\": 0.001284,\n \"samples_ns\": [ 10363940853, 10362736302, 10361791473 ],\n \"samples_ts\": [ 12.3505, 12.3519, 12.3531 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:29:34Z", + "avg_ns": 5520717156, + "stddev_ns": 385549, + "avg_ts": 23.185394, + "stddev_ts": 0.001619, + "samples_ns": [ + 5520331008, + 5521102104, + 5520718356 + ], + "samples_ts": [ + 23.187, + 23.1838, + 23.1854 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:29:56Z", + "avg_ns": 10362822876, + "stddev_ns": 1077302, + "avg_ts": 12.351847, + "stddev_ts": 0.001284, + "samples_ns": [ + 10363940853, + 10362736302, + 10361791473 + ], + "samples_ts": [ + 12.3505, + 12.3519, + 12.3531 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 484 + }, + { + "timestamp_utc": "2025-12-09T03:32:56.204825+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:30:28Z\",\n \"avg_ns\": 5522956946,\n \"stddev_ns\": 507716,\n \"avg_ts\": 23.175991,\n \"stddev_ts\": 0.002084,\n \"samples_ns\": [ 5522625689, 5522717074, 5523528077 ],\n \"samples_ts\": [ 23.1774, 23.177, 23.1736 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:30:50Z\",\n \"avg_ns\": 41807603157,\n \"stddev_ns\": 2730184,\n \"avg_ts\": 12.246576,\n \"stddev_ts\": 0.000798,\n \"samples_ns\": [ 41810078144, 41808044334, 41804686994 ],\n \"samples_ts\": [ 12.2459, 12.2464, 12.2474 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:30:28Z", + "avg_ns": 5522956946, + "stddev_ns": 507716, + "avg_ts": 23.175991, + "stddev_ts": 0.002084, + "samples_ns": [ + 5522625689, + 5522717074, + 5523528077 + ], + "samples_ts": [ + 23.1774, + 23.177, + 23.1736 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:30:50Z", + "avg_ns": 41807603157, + "stddev_ns": 2730184, + "avg_ts": 12.246576, + "stddev_ts": 0.000798, + "samples_ns": [ + 41810078144, + 41808044334, + 41804686994 + ], + "samples_ts": [ + 12.2459, + 12.2464, + 12.2474 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 485 + }, + { + "timestamp_utc": "2025-12-09T03:34:57.098705+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:32:57Z\",\n \"avg_ns\": 22207945617,\n \"stddev_ns\": 552473,\n \"avg_ts\": 23.054812,\n \"stddev_ts\": 0.000530,\n \"samples_ns\": [ 22207401880, 22208019818, 22208415155 ],\n \"samples_ts\": [ 23.0554, 23.0547, 23.0543 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:34:25Z\",\n \"avg_ns\": 10348542539,\n \"stddev_ns\": 595288,\n \"avg_ts\": 12.368892,\n \"stddev_ts\": 0.000712,\n \"samples_ns\": [ 10347947689, 10349138266, 10348541662 ],\n \"samples_ts\": [ 12.3696, 12.3682, 12.3689 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:32:57Z", + "avg_ns": 22207945617, + "stddev_ns": 552473, + "avg_ts": 23.054812, + "stddev_ts": 0.00053, + "samples_ns": [ + 22207401880, + 22208019818, + 22208415155 + ], + "samples_ts": [ + 23.0554, + 23.0547, + 23.0543 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:34:25Z", + "avg_ns": 10348542539, + "stddev_ns": 595288, + "avg_ts": 12.368892, + "stddev_ts": 0.000712, + "samples_ns": [ + 10347947689, + 10349138266, + 10348541662 + ], + "samples_ts": [ + 12.3696, + 12.3682, + 12.3689 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 486 + }, + { + "timestamp_utc": "2025-12-09T03:38:32.248428+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:34:57Z\",\n \"avg_ns\": 22206301340,\n \"stddev_ns\": 507828,\n \"avg_ts\": 23.056519,\n \"stddev_ts\": 0.000504,\n \"samples_ns\": [ 22206698902, 22206444819, 22205760300 ],\n \"samples_ts\": [ 23.0561, 23.0564, 23.0571 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:36:26Z\",\n \"avg_ns\": 41758070666,\n \"stddev_ns\": 2825246,\n \"avg_ts\": 12.261103,\n \"stddev_ts\": 0.000827,\n \"samples_ns\": [ 41761156485, 41757421374, 41755634140 ],\n \"samples_ts\": [ 12.2602, 12.2613, 12.2618 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:34:57Z", + "avg_ns": 22206301340, + "stddev_ns": 507828, + "avg_ts": 23.056519, + "stddev_ts": 0.000504, + "samples_ns": [ + 22206698902, + 22206444819, + 22205760300 + ], + "samples_ts": [ + 23.0561, + 23.0564, + 23.0571 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:36:26Z", + "avg_ns": 41758070666, + "stddev_ns": 2825246, + "avg_ts": 12.261103, + "stddev_ts": 0.000827, + "samples_ns": [ + 41761156485, + 41757421374, + 41755634140 + ], + "samples_ts": [ + 12.2602, + 12.2613, + 12.2618 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 487 + }, + { + "timestamp_utc": "2025-12-09T03:39:26.509170+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:38:33Z\",\n \"avg_ns\": 5524526455,\n \"stddev_ns\": 527436,\n \"avg_ts\": 23.169407,\n \"stddev_ts\": 0.002168,\n \"samples_ns\": [ 5523937853, 5524906199, 5524735315 ],\n \"samples_ts\": [ 23.1719, 23.1678, 23.1685 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:38:55Z\",\n \"avg_ns\": 10366684664,\n \"stddev_ns\": 2582807,\n \"avg_ts\": 12.347246,\n \"stddev_ts\": 0.003076,\n \"samples_ns\": [ 10369433775, 10366311451, 10364308766 ],\n \"samples_ts\": [ 12.344, 12.3477, 12.3501 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:38:33Z", + "avg_ns": 5524526455, + "stddev_ns": 527436, + "avg_ts": 23.169407, + "stddev_ts": 0.002168, + "samples_ns": [ + 5523937853, + 5524906199, + 5524735315 + ], + "samples_ts": [ + 23.1719, + 23.1678, + 23.1685 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:38:55Z", + "avg_ns": 10366684664, + "stddev_ns": 2582807, + "avg_ts": 12.347246, + "stddev_ts": 0.003076, + "samples_ns": [ + 10369433775, + 10366311451, + 10364308766 + ], + "samples_ts": [ + 12.344, + 12.3477, + 12.3501 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 488 + }, + { + "timestamp_utc": "2025-12-09T03:41:55.306843+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:39:27Z\",\n \"avg_ns\": 5523735565,\n \"stddev_ns\": 958664,\n \"avg_ts\": 23.172725,\n \"stddev_ts\": 0.003997,\n \"samples_ns\": [ 5524738428, 5522842101, 5523626168 ],\n \"samples_ts\": [ 23.1685, 23.1765, 23.1732 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:39:49Z\",\n \"avg_ns\": 41883236917,\n \"stddev_ns\": 8749411,\n \"avg_ts\": 12.224461,\n \"stddev_ts\": 0.002553,\n \"samples_ns\": [ 41892737402, 41875517516, 41881455834 ],\n \"samples_ts\": [ 12.2217, 12.2267, 12.225 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:39:27Z", + "avg_ns": 5523735565, + "stddev_ns": 958664, + "avg_ts": 23.172725, + "stddev_ts": 0.003997, + "samples_ns": [ + 5524738428, + 5522842101, + 5523626168 + ], + "samples_ts": [ + 23.1685, + 23.1765, + 23.1732 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:39:49Z", + "avg_ns": 41883236917, + "stddev_ns": 8749411, + "avg_ts": 12.224461, + "stddev_ts": 0.002553, + "samples_ns": [ + 41892737402, + 41875517516, + 41881455834 + ], + "samples_ts": [ + 12.2217, + 12.2267, + 12.225 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 489 + }, + { + "timestamp_utc": "2025-12-09T03:43:57.092430+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:41:56Z\",\n \"avg_ns\": 22423680401,\n \"stddev_ns\": 574431,\n \"avg_ts\": 22.833005,\n \"stddev_ts\": 0.000544,\n \"samples_ns\": [ 22423470327, 22423283417, 22424287461 ],\n \"samples_ts\": [ 22.8332, 22.8334, 22.8324 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:43:25Z\",\n \"avg_ns\": 10348129248,\n \"stddev_ns\": 3739893,\n \"avg_ts\": 12.369387,\n \"stddev_ts\": 0.004467,\n \"samples_ns\": [ 10351659764, 10348512875, 10344215107 ],\n \"samples_ts\": [ 12.3652, 12.3689, 12.3741 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:41:56Z", + "avg_ns": 22423680401, + "stddev_ns": 574431, + "avg_ts": 22.833005, + "stddev_ts": 0.000544, + "samples_ns": [ + 22423470327, + 22423283417, + 22424287461 + ], + "samples_ts": [ + 22.8332, + 22.8334, + 22.8324 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:43:25Z", + "avg_ns": 10348129248, + "stddev_ns": 3739893, + "avg_ts": 12.369387, + "stddev_ts": 0.004467, + "samples_ns": [ + 10351659764, + 10348512875, + 10344215107 + ], + "samples_ts": [ + 12.3652, + 12.3689, + 12.3741 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 490 + }, + { + "timestamp_utc": "2025-12-09T03:47:33.087963+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:43:57Z\",\n \"avg_ns\": 22417443181,\n \"stddev_ns\": 344762,\n \"avg_ts\": 22.839358,\n \"stddev_ts\": 0.000316,\n \"samples_ns\": [ 22417176803, 22417784278, 22417368463 ],\n \"samples_ts\": [ 22.8396, 22.839, 22.8394 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:45:27Z\",\n \"avg_ns\": 41766507746,\n \"stddev_ns\": 3512809,\n \"avg_ts\": 12.258626,\n \"stddev_ts\": 0.001031,\n \"samples_ns\": [ 41769939613, 41762919234, 41766664391 ],\n \"samples_ts\": [ 12.2576, 12.2597, 12.2586 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:43:57Z", + "avg_ns": 22417443181, + "stddev_ns": 344762, + "avg_ts": 22.839358, + "stddev_ts": 0.000316, + "samples_ns": [ + 22417176803, + 22417784278, + 22417368463 + ], + "samples_ts": [ + 22.8396, + 22.839, + 22.8394 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:45:27Z", + "avg_ns": 41766507746, + "stddev_ns": 3512809, + "avg_ts": 12.258626, + "stddev_ts": 0.001031, + "samples_ns": [ + 41769939613, + 41762919234, + 41766664391 + ], + "samples_ts": [ + 12.2576, + 12.2597, + 12.2586 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 491 + }, + { + "timestamp_utc": "2025-12-09T03:48:27.309793+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:47:33Z\",\n \"avg_ns\": 5521472037,\n \"stddev_ns\": 1702944,\n \"avg_ts\": 23.182225,\n \"stddev_ts\": 0.007149,\n \"samples_ns\": [ 5520428425, 5523437164, 5520550522 ],\n \"samples_ts\": [ 23.1866, 23.174, 23.1861 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:47:56Z\",\n \"avg_ns\": 10364957196,\n \"stddev_ns\": 2737331,\n \"avg_ts\": 12.349304,\n \"stddev_ts\": 0.003261,\n \"samples_ns\": [ 10367990294, 10364210856, 10362670438 ],\n \"samples_ts\": [ 12.3457, 12.3502, 12.352 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:47:33Z", + "avg_ns": 5521472037, + "stddev_ns": 1702944, + "avg_ts": 23.182225, + "stddev_ts": 0.007149, + "samples_ns": [ + 5520428425, + 5523437164, + 5520550522 + ], + "samples_ts": [ + 23.1866, + 23.174, + 23.1861 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:47:56Z", + "avg_ns": 10364957196, + "stddev_ns": 2737331, + "avg_ts": 12.349304, + "stddev_ts": 0.003261, + "samples_ns": [ + 10367990294, + 10364210856, + 10362670438 + ], + "samples_ts": [ + 12.3457, + 12.3502, + 12.352 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 492 + }, + { + "timestamp_utc": "2025-12-09T03:50:56.839807+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:48:28Z\",\n \"avg_ns\": 5519520954,\n \"stddev_ns\": 386598,\n \"avg_ts\": 23.190418,\n \"stddev_ts\": 0.001594,\n \"samples_ns\": [ 5519757148, 5519083329, 5519722386 ],\n \"samples_ts\": [ 23.1894, 23.1923, 23.1896 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:48:50Z\",\n \"avg_ns\": 42142587574,\n \"stddev_ns\": 3430085,\n \"avg_ts\": 12.149230,\n \"stddev_ts\": 0.000989,\n \"samples_ns\": [ 42146515065, 42140180439, 42141067218 ],\n \"samples_ts\": [ 12.1481, 12.1499, 12.1497 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:48:28Z", + "avg_ns": 5519520954, + "stddev_ns": 386598, + "avg_ts": 23.190418, + "stddev_ts": 0.001594, + "samples_ns": [ + 5519757148, + 5519083329, + 5519722386 + ], + "samples_ts": [ + 23.1894, + 23.1923, + 23.1896 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:48:50Z", + "avg_ns": 42142587574, + "stddev_ns": 3430085, + "avg_ts": 12.14923, + "stddev_ts": 0.000989, + "samples_ns": [ + 42146515065, + 42140180439, + 42141067218 + ], + "samples_ts": [ + 12.1481, + 12.1499, + 12.1497 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 493 + }, + { + "timestamp_utc": "2025-12-09T03:52:57.613436+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:50:57Z\",\n \"avg_ns\": 22174941033,\n \"stddev_ns\": 1645821,\n \"avg_ts\": 23.089126,\n \"stddev_ts\": 0.001707,\n \"samples_ns\": [ 22176247377, 22175473886, 22173101837 ],\n \"samples_ts\": [ 23.0878, 23.0886, 23.091 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:52:26Z\",\n \"avg_ns\": 10350875403,\n \"stddev_ns\": 863158,\n \"avg_ts\": 12.366104,\n \"stddev_ts\": 0.001024,\n \"samples_ns\": [ 10351123485, 10351581142, 10349921583 ],\n \"samples_ts\": [ 12.3658, 12.3653, 12.3672 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:50:57Z", + "avg_ns": 22174941033, + "stddev_ns": 1645821, + "avg_ts": 23.089126, + "stddev_ts": 0.001707, + "samples_ns": [ + 22176247377, + 22175473886, + 22173101837 + ], + "samples_ts": [ + 23.0878, + 23.0886, + 23.091 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:52:26Z", + "avg_ns": 10350875403, + "stddev_ns": 863158, + "avg_ts": 12.366104, + "stddev_ts": 0.001024, + "samples_ns": [ + 10351123485, + 10351581142, + 10349921583 + ], + "samples_ts": [ + 12.3658, + 12.3653, + 12.3672 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 494 + }, + { + "timestamp_utc": "2025-12-09T03:56:32.614126+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:52:58Z\",\n \"avg_ns\": 22181626799,\n \"stddev_ns\": 646116,\n \"avg_ts\": 23.082166,\n \"stddev_ts\": 0.000636,\n \"samples_ns\": [ 22182006713, 22181951481, 22180922205 ],\n \"samples_ts\": [ 23.0818, 23.0818, 23.0829 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:54:27Z\",\n \"avg_ns\": 41750021767,\n \"stddev_ns\": 3593827,\n \"avg_ts\": 12.263467,\n \"stddev_ts\": 0.001052,\n \"samples_ns\": [ 41750383873, 41753409154, 41746272276 ],\n \"samples_ts\": [ 12.2634, 12.2625, 12.2646 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:52:58Z", + "avg_ns": 22181626799, + "stddev_ns": 646116, + "avg_ts": 23.082166, + "stddev_ts": 0.000636, + "samples_ns": [ + 22182006713, + 22181951481, + 22180922205 + ], + "samples_ts": [ + 23.0818, + 23.0818, + 23.0829 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:54:27Z", + "avg_ns": 41750021767, + "stddev_ns": 3593827, + "avg_ts": 12.263467, + "stddev_ts": 0.001052, + "samples_ns": [ + 41750383873, + 41753409154, + 41746272276 + ], + "samples_ts": [ + 12.2634, + 12.2625, + 12.2646 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 495 + }, + { + "timestamp_utc": "2025-12-09T03:57:26.880537+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:56:33Z\",\n \"avg_ns\": 5522015874,\n \"stddev_ns\": 361920,\n \"avg_ts\": 23.179941,\n \"stddev_ts\": 0.001519,\n \"samples_ns\": [ 5521695198, 5522408293, 5521944131 ],\n \"samples_ts\": [ 23.1813, 23.1783, 23.1802 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:56:55Z\",\n \"avg_ns\": 10369326786,\n \"stddev_ns\": 4813587,\n \"avg_ts\": 12.344101,\n \"stddev_ts\": 0.005730,\n \"samples_ns\": [ 10374330357, 10368921148, 10364728853 ],\n \"samples_ts\": [ 12.3381, 12.3446, 12.3496 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:56:33Z", + "avg_ns": 5522015874, + "stddev_ns": 361920, + "avg_ts": 23.179941, + "stddev_ts": 0.001519, + "samples_ns": [ + 5521695198, + 5522408293, + 5521944131 + ], + "samples_ts": [ + 23.1813, + 23.1783, + 23.1802 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T03:56:55Z", + "avg_ns": 10369326786, + "stddev_ns": 4813587, + "avg_ts": 12.344101, + "stddev_ts": 0.00573, + "samples_ns": [ + 10374330357, + 10368921148, + 10364728853 + ], + "samples_ts": [ + 12.3381, + 12.3446, + 12.3496 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 496 + }, + { + "timestamp_utc": "2025-12-09T03:59:55.418636+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:57:27Z\",\n \"avg_ns\": 5520768585,\n \"stddev_ns\": 1561272,\n \"avg_ts\": 23.185179,\n \"stddev_ts\": 0.006549,\n \"samples_ns\": [ 5522520719, 5519532558, 5520252479 ],\n \"samples_ts\": [ 23.1778, 23.1904, 23.1873 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:57:49Z\",\n \"avg_ns\": 41794759443,\n \"stddev_ns\": 2901591,\n \"avg_ts\": 12.250340,\n \"stddev_ts\": 0.000850,\n \"samples_ns\": [ 41797311718, 41795363107, 41791603504 ],\n \"samples_ts\": [ 12.2496, 12.2502, 12.2513 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:57:27Z", + "avg_ns": 5520768585, + "stddev_ns": 1561272, + "avg_ts": 23.185179, + "stddev_ts": 0.006549, + "samples_ns": [ + 5522520719, + 5519532558, + 5520252479 + ], + "samples_ts": [ + 23.1778, + 23.1904, + 23.1873 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T03:57:49Z", + "avg_ns": 41794759443, + "stddev_ns": 2901591, + "avg_ts": 12.25034, + "stddev_ts": 0.00085, + "samples_ns": [ + 41797311718, + 41795363107, + 41791603504 + ], + "samples_ts": [ + 12.2496, + 12.2502, + 12.2513 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 497 + }, + { + "timestamp_utc": "2025-12-09T04:01:56.291937+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T03:59:56Z\",\n \"avg_ns\": 22198062704,\n \"stddev_ns\": 1416177,\n \"avg_ts\": 23.065076,\n \"stddev_ts\": 0.001455,\n \"samples_ns\": [ 22197396490, 22199671853, 22197119771 ],\n \"samples_ts\": [ 23.0658, 23.0634, 23.0661 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:01:25Z\",\n \"avg_ns\": 10353711922,\n \"stddev_ns\": 2650622,\n \"avg_ts\": 12.362717,\n \"stddev_ts\": 0.003160,\n \"samples_ns\": [ 10353364846, 10356515051, 10351255871 ],\n \"samples_ts\": [ 12.3631, 12.3594, 12.3656 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T03:59:56Z", + "avg_ns": 22198062704, + "stddev_ns": 1416177, + "avg_ts": 23.065076, + "stddev_ts": 0.001455, + "samples_ns": [ + 22197396490, + 22199671853, + 22197119771 + ], + "samples_ts": [ + 23.0658, + 23.0634, + 23.0661 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:01:25Z", + "avg_ns": 10353711922, + "stddev_ns": 2650622, + "avg_ts": 12.362717, + "stddev_ts": 0.00316, + "samples_ns": [ + 10353364846, + 10356515051, + 10351255871 + ], + "samples_ts": [ + 12.3631, + 12.3594, + 12.3656 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 498 + }, + { + "timestamp_utc": "2025-12-09T04:05:31.313675+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:01:57Z\",\n \"avg_ns\": 22199455885,\n \"stddev_ns\": 1802413,\n \"avg_ts\": 23.063628,\n \"stddev_ts\": 0.001866,\n \"samples_ns\": [ 22201416854, 22199060589, 22197890213 ],\n \"samples_ts\": [ 23.0616, 23.064, 23.0653 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:03:25Z\",\n \"avg_ns\": 41725694287,\n \"stddev_ns\": 7038669,\n \"avg_ts\": 12.270617,\n \"stddev_ts\": 0.002070,\n \"samples_ns\": [ 41733468459, 41719754261, 41723860141 ],\n \"samples_ts\": [ 12.2683, 12.2724, 12.2712 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:01:57Z", + "avg_ns": 22199455885, + "stddev_ns": 1802413, + "avg_ts": 23.063628, + "stddev_ts": 0.001866, + "samples_ns": [ + 22201416854, + 22199060589, + 22197890213 + ], + "samples_ts": [ + 23.0616, + 23.064, + 23.0653 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:03:25Z", + "avg_ns": 41725694287, + "stddev_ns": 7038669, + "avg_ts": 12.270617, + "stddev_ts": 0.00207, + "samples_ns": [ + 41733468459, + 41719754261, + 41723860141 + ], + "samples_ts": [ + 12.2683, + 12.2724, + 12.2712 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 499 + }, + { + "timestamp_utc": "2025-12-09T04:06:25.476884+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:05:32Z\",\n \"avg_ns\": 5520340246,\n \"stddev_ns\": 204803,\n \"avg_ts\": 23.186977,\n \"stddev_ts\": 0.000802,\n \"samples_ns\": [ 5520275331, 5520555087, 5520190321 ],\n \"samples_ts\": [ 23.1872, 23.1861, 23.1876 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:05:54Z\",\n \"avg_ns\": 10354673038,\n \"stddev_ns\": 2544305,\n \"avg_ts\": 12.361569,\n \"stddev_ts\": 0.003035,\n \"samples_ns\": [ 10357307181, 10354478066, 10352233868 ],\n \"samples_ts\": [ 12.3584, 12.3618, 12.3645 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:05:32Z", + "avg_ns": 5520340246, + "stddev_ns": 204803, + "avg_ts": 23.186977, + "stddev_ts": 0.000802, + "samples_ns": [ + 5520275331, + 5520555087, + 5520190321 + ], + "samples_ts": [ + 23.1872, + 23.1861, + 23.1876 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:05:54Z", + "avg_ns": 10354673038, + "stddev_ns": 2544305, + "avg_ts": 12.361569, + "stddev_ts": 0.003035, + "samples_ns": [ + 10357307181, + 10354478066, + 10352233868 + ], + "samples_ts": [ + 12.3584, + 12.3618, + 12.3645 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 500 + }, + { + "timestamp_utc": "2025-12-09T04:08:54.328463+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:06:26Z\",\n \"avg_ns\": 5521951852,\n \"stddev_ns\": 152167,\n \"avg_ts\": 23.180209,\n \"stddev_ts\": 0.000462,\n \"samples_ns\": [ 5522066699, 5521941532, 5521847327 ],\n \"samples_ts\": [ 23.1797, 23.1803, 23.1806 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:06:48Z\",\n \"avg_ns\": 41913618264,\n \"stddev_ns\": 6793031,\n \"avg_ts\": 12.215600,\n \"stddev_ts\": 0.001979,\n \"samples_ns\": [ 41919537160, 41915111722, 41906205911 ],\n \"samples_ts\": [ 12.2139, 12.2152, 12.2178 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:06:26Z", + "avg_ns": 5521951852, + "stddev_ns": 152167, + "avg_ts": 23.180209, + "stddev_ts": 0.000462, + "samples_ns": [ + 5522066699, + 5521941532, + 5521847327 + ], + "samples_ts": [ + 23.1797, + 23.1803, + 23.1806 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:06:48Z", + "avg_ns": 41913618264, + "stddev_ns": 6793031, + "avg_ts": 12.2156, + "stddev_ts": 0.001979, + "samples_ns": [ + 41919537160, + 41915111722, + 41906205911 + ], + "samples_ts": [ + 12.2139, + 12.2152, + 12.2178 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 501 + }, + { + "timestamp_utc": "2025-12-09T04:10:56.168690+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:08:55Z\",\n \"avg_ns\": 22421281326,\n \"stddev_ns\": 1764722,\n \"avg_ts\": 22.835448,\n \"stddev_ts\": 0.001784,\n \"samples_ns\": [ 22422039051, 22422526901, 22419278028 ],\n \"samples_ts\": [ 22.8347, 22.8342, 22.8375 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:10:24Z\",\n \"avg_ns\": 10362896526,\n \"stddev_ns\": 2506586,\n \"avg_ts\": 12.351759,\n \"stddev_ts\": 0.002988,\n \"samples_ns\": [ 10365208416, 10360232486, 10363248676 ],\n \"samples_ts\": [ 12.349, 12.3549, 12.3513 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:08:55Z", + "avg_ns": 22421281326, + "stddev_ns": 1764722, + "avg_ts": 22.835448, + "stddev_ts": 0.001784, + "samples_ns": [ + 22422039051, + 22422526901, + 22419278028 + ], + "samples_ts": [ + 22.8347, + 22.8342, + 22.8375 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:10:24Z", + "avg_ns": 10362896526, + "stddev_ns": 2506586, + "avg_ts": 12.351759, + "stddev_ts": 0.002988, + "samples_ns": [ + 10365208416, + 10360232486, + 10363248676 + ], + "samples_ts": [ + 12.349, + 12.3549, + 12.3513 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 502 + }, + { + "timestamp_utc": "2025-12-09T04:14:32.408308+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:10:57Z\",\n \"avg_ns\": 22417946647,\n \"stddev_ns\": 2196739,\n \"avg_ts\": 22.838845,\n \"stddev_ts\": 0.002228,\n \"samples_ns\": [ 22415421884, 22419211304, 22419206755 ],\n \"samples_ts\": [ 22.8414, 22.8376, 22.8376 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:12:26Z\",\n \"avg_ns\": 41833858764,\n \"stddev_ns\": 3478326,\n \"avg_ts\": 12.238890,\n \"stddev_ts\": 0.001018,\n \"samples_ns\": [ 41831728780, 41831974836, 41837872676 ],\n \"samples_ts\": [ 12.2395, 12.2394, 12.2377 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:10:57Z", + "avg_ns": 22417946647, + "stddev_ns": 2196739, + "avg_ts": 22.838845, + "stddev_ts": 0.002228, + "samples_ns": [ + 22415421884, + 22419211304, + 22419206755 + ], + "samples_ts": [ + 22.8414, + 22.8376, + 22.8376 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:12:26Z", + "avg_ns": 41833858764, + "stddev_ns": 3478326, + "avg_ts": 12.23889, + "stddev_ts": 0.001018, + "samples_ns": [ + 41831728780, + 41831974836, + 41837872676 + ], + "samples_ts": [ + 12.2395, + 12.2394, + 12.2377 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 503 + }, + { + "timestamp_utc": "2025-12-09T04:15:19.663670+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:14:33Z\",\n \"avg_ns\": 3765308803,\n \"stddev_ns\": 123899,\n \"avg_ts\": 33.994556,\n \"stddev_ts\": 0.000972,\n \"samples_ns\": [ 3765328668, 3765405125, 3765192617 ],\n \"samples_ts\": [ 33.9944, 33.9937, 33.9956 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:14:48Z\",\n \"avg_ns\": 10368972276,\n \"stddev_ns\": 2796448,\n \"avg_ts\": 12.344522,\n \"stddev_ts\": 0.003327,\n \"samples_ns\": [ 10365789535, 10370102806, 10371024488 ],\n \"samples_ts\": [ 12.3483, 12.3432, 12.3421 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:14:33Z", + "avg_ns": 3765308803, + "stddev_ns": 123899, + "avg_ts": 33.994556, + "stddev_ts": 0.000972, + "samples_ns": [ + 3765328668, + 3765405125, + 3765192617 + ], + "samples_ts": [ + 33.9944, + 33.9937, + 33.9956 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:14:48Z", + "avg_ns": 10368972276, + "stddev_ns": 2796448, + "avg_ts": 12.344522, + "stddev_ts": 0.003327, + "samples_ns": [ + 10365789535, + 10370102806, + 10371024488 + ], + "samples_ts": [ + 12.3483, + 12.3432, + 12.3421 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 504 + }, + { + "timestamp_utc": "2025-12-09T04:17:41.322443+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:15:20Z\",\n \"avg_ns\": 3760265796,\n \"stddev_ns\": 488042,\n \"avg_ts\": 34.040147,\n \"stddev_ts\": 0.004383,\n \"samples_ns\": [ 3760507465, 3759708359, 3760581565 ],\n \"samples_ts\": [ 34.038, 34.0452, 34.0373 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:15:35Z\",\n \"avg_ns\": 41845519409,\n \"stddev_ns\": 303654607,\n \"avg_ts\": 12.235907,\n \"stddev_ts\": 0.088421,\n \"samples_ns\": [ 41663026298, 41677481884, 42196050047 ],\n \"samples_ts\": [ 12.2891, 12.2848, 12.1338 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:15:20Z", + "avg_ns": 3760265796, + "stddev_ns": 488042, + "avg_ts": 34.040147, + "stddev_ts": 0.004383, + "samples_ns": [ + 3760507465, + 3759708359, + 3760581565 + ], + "samples_ts": [ + 34.038, + 34.0452, + 34.0373 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:15:35Z", + "avg_ns": 41845519409, + "stddev_ns": 303654607, + "avg_ts": 12.235907, + "stddev_ts": 0.088421, + "samples_ns": [ + 41663026298, + 41677481884, + 42196050047 + ], + "samples_ts": [ + 12.2891, + 12.2848, + 12.1338 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 505 + }, + { + "timestamp_utc": "2025-12-09T04:19:14.805755+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:17:42Z\",\n \"avg_ns\": 15085239449,\n \"stddev_ns\": 1721727,\n \"avg_ts\": 33.940463,\n \"stddev_ts\": 0.003874,\n \"samples_ns\": [ 15086184826, 15083252151, 15086281370 ],\n \"samples_ts\": [ 33.9383, 33.9449, 33.9381 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:18:42Z\",\n \"avg_ns\": 10693377173,\n \"stddev_ns\": 103628470,\n \"avg_ts\": 11.970779,\n \"stddev_ts\": 0.116659,\n \"samples_ns\": [ 10573743643, 10755364902, 10751022975 ],\n \"samples_ts\": [ 12.1055, 11.901, 11.9058 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:17:42Z", + "avg_ns": 15085239449, + "stddev_ns": 1721727, + "avg_ts": 33.940463, + "stddev_ts": 0.003874, + "samples_ns": [ + 15086184826, + 15083252151, + 15086281370 + ], + "samples_ts": [ + 33.9383, + 33.9449, + 33.9381 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:18:42Z", + "avg_ns": 10693377173, + "stddev_ns": 103628470, + "avg_ts": 11.970779, + "stddev_ts": 0.116659, + "samples_ns": [ + 10573743643, + 10755364902, + 10751022975 + ], + "samples_ts": [ + 12.1055, + 11.901, + 11.9058 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 506 + }, + { + "timestamp_utc": "2025-12-09T04:22:26.081121+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:19:15Z\",\n \"avg_ns\": 15084119117,\n \"stddev_ns\": 739079,\n \"avg_ts\": 33.942983,\n \"stddev_ts\": 0.001663,\n \"samples_ns\": [ 15083797667, 15084964489, 15083595195 ],\n \"samples_ts\": [ 33.9437, 33.9411, 33.9442 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:20:16Z\",\n \"avg_ns\": 43288326892,\n \"stddev_ns\": 68201999,\n \"avg_ts\": 11.827689,\n \"stddev_ts\": 0.018652,\n \"samples_ns\": [ 43209619249, 43329982790, 43325378638 ],\n \"samples_ts\": [ 11.8492, 11.8163, 11.8176 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:19:15Z", + "avg_ns": 15084119117, + "stddev_ns": 739079, + "avg_ts": 33.942983, + "stddev_ts": 0.001663, + "samples_ns": [ + 15083797667, + 15084964489, + 15083595195 + ], + "samples_ts": [ + 33.9437, + 33.9411, + 33.9442 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:20:16Z", + "avg_ns": 43288326892, + "stddev_ns": 68201999, + "avg_ts": 11.827689, + "stddev_ts": 0.018652, + "samples_ns": [ + 43209619249, + 43329982790, + 43325378638 + ], + "samples_ts": [ + 11.8492, + 11.8163, + 11.8176 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 507 + }, + { + "timestamp_utc": "2025-12-09T04:23:14.251942+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:22:27Z\",\n \"avg_ns\": 3759049315,\n \"stddev_ns\": 725736,\n \"avg_ts\": 34.051164,\n \"stddev_ts\": 0.006574,\n \"samples_ns\": [ 3759851540, 3758858011, 3758438394 ],\n \"samples_ts\": [ 34.0439, 34.0529, 34.0567 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:22:42Z\",\n \"avg_ns\": 10665889805,\n \"stddev_ns\": 141450076,\n \"avg_ts\": 12.002291,\n \"stddev_ts\": 0.160270,\n \"samples_ns\": [ 10504562606, 10768650354, 10724456455 ],\n \"samples_ts\": [ 12.1852, 11.8864, 11.9353 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:22:27Z", + "avg_ns": 3759049315, + "stddev_ns": 725736, + "avg_ts": 34.051164, + "stddev_ts": 0.006574, + "samples_ns": [ + 3759851540, + 3758858011, + 3758438394 + ], + "samples_ts": [ + 34.0439, + 34.0529, + 34.0567 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:22:42Z", + "avg_ns": 10665889805, + "stddev_ns": 141450076, + "avg_ts": 12.002291, + "stddev_ts": 0.16027, + "samples_ns": [ + 10504562606, + 10768650354, + 10724456455 + ], + "samples_ts": [ + 12.1852, + 11.8864, + 11.9353 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 508 + }, + { + "timestamp_utc": "2025-12-09T04:25:40.076120+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:23:15Z\",\n \"avg_ns\": 3759167020,\n \"stddev_ns\": 1471057,\n \"avg_ts\": 34.050100,\n \"stddev_ts\": 0.013299,\n \"samples_ns\": [ 3758127175, 3758526972, 3760846915 ],\n \"samples_ts\": [ 34.0595, 34.0559, 34.0349 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:23:30Z\",\n \"avg_ns\": 43241747515,\n \"stddev_ns\": 193787240,\n \"avg_ts\": 11.840568,\n \"stddev_ts\": 0.053201,\n \"samples_ns\": [ 43017981794, 43353273091, 43353987661 ],\n \"samples_ts\": [ 11.902, 11.81, 11.8098 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:23:15Z", + "avg_ns": 3759167020, + "stddev_ns": 1471057, + "avg_ts": 34.0501, + "stddev_ts": 0.013299, + "samples_ns": [ + 3758127175, + 3758526972, + 3760846915 + ], + "samples_ts": [ + 34.0595, + 34.0559, + 34.0349 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:23:30Z", + "avg_ns": 43241747515, + "stddev_ns": 193787240, + "avg_ts": 11.840568, + "stddev_ts": 0.053201, + "samples_ns": [ + 43017981794, + 43353273091, + 43353987661 + ], + "samples_ts": [ + 11.902, + 11.81, + 11.8098 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 509 + }, + { + "timestamp_utc": "2025-12-09T04:27:13.607912+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:25:40Z\",\n \"avg_ns\": 15099094187,\n \"stddev_ns\": 2997384,\n \"avg_ts\": 33.909320,\n \"stddev_ts\": 0.006727,\n \"samples_ns\": [ 15100711804, 15095638361, 15100932397 ],\n \"samples_ts\": [ 33.9057, 33.9171, 33.9052 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:26:41Z\",\n \"avg_ns\": 10687516619,\n \"stddev_ns\": 90657097,\n \"avg_ts\": 11.977167,\n \"stddev_ts\": 0.102087,\n \"samples_ns\": [ 10583055803, 10745633625, 10733860430 ],\n \"samples_ts\": [ 12.0948, 11.9118, 11.9249 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:25:40Z", + "avg_ns": 15099094187, + "stddev_ns": 2997384, + "avg_ts": 33.90932, + "stddev_ts": 0.006727, + "samples_ns": [ + 15100711804, + 15095638361, + 15100932397 + ], + "samples_ts": [ + 33.9057, + 33.9171, + 33.9052 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:26:41Z", + "avg_ns": 10687516619, + "stddev_ns": 90657097, + "avg_ts": 11.977167, + "stddev_ts": 0.102087, + "samples_ns": [ + 10583055803, + 10745633625, + 10733860430 + ], + "samples_ts": [ + 12.0948, + 11.9118, + 11.9249 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 510 + }, + { + "timestamp_utc": "2025-12-09T04:30:24.425019+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:27:14Z\",\n \"avg_ns\": 15091867820,\n \"stddev_ns\": 4055514,\n \"avg_ts\": 33.925557,\n \"stddev_ts\": 0.009111,\n \"samples_ns\": [ 15096548563, 15089517683, 15089537215 ],\n \"samples_ts\": [ 33.915, 33.9308, 33.9308 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:28:14Z\",\n \"avg_ns\": 43125554906,\n \"stddev_ns\": 82263932,\n \"avg_ts\": 11.872340,\n \"stddev_ts\": 0.022672,\n \"samples_ns\": [ 43030601914, 43170738280, 43175324525 ],\n \"samples_ts\": [ 11.8985, 11.8599, 11.8586 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:27:14Z", + "avg_ns": 15091867820, + "stddev_ns": 4055514, + "avg_ts": 33.925557, + "stddev_ts": 0.009111, + "samples_ns": [ + 15096548563, + 15089517683, + 15089537215 + ], + "samples_ts": [ + 33.915, + 33.9308, + 33.9308 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:28:14Z", + "avg_ns": 43125554906, + "stddev_ns": 82263932, + "avg_ts": 11.87234, + "stddev_ts": 0.022672, + "samples_ns": [ + 43030601914, + 43170738280, + 43175324525 + ], + "samples_ts": [ + 11.8985, + 11.8599, + 11.8586 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 511 + }, + { + "timestamp_utc": "2025-12-09T04:31:12.187492+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:30:25Z\",\n \"avg_ns\": 3763017598,\n \"stddev_ns\": 724804,\n \"avg_ts\": 34.015255,\n \"stddev_ts\": 0.006528,\n \"samples_ns\": [ 3762399586, 3762841708, 3763811501 ],\n \"samples_ts\": [ 34.0208, 34.0168, 34.0081 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:30:40Z\",\n \"avg_ns\": 10533892964,\n \"stddev_ns\": 151110240,\n \"avg_ts\": 12.152933,\n \"stddev_ts\": 0.175690,\n \"samples_ns\": [ 10360797969, 10639490032, 10601390892 ],\n \"samples_ts\": [ 12.3543, 12.0307, 12.0739 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:30:25Z", + "avg_ns": 3763017598, + "stddev_ns": 724804, + "avg_ts": 34.015255, + "stddev_ts": 0.006528, + "samples_ns": [ + 3762399586, + 3762841708, + 3763811501 + ], + "samples_ts": [ + 34.0208, + 34.0168, + 34.0081 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:30:40Z", + "avg_ns": 10533892964, + "stddev_ns": 151110240, + "avg_ts": 12.152933, + "stddev_ts": 0.17569, + "samples_ns": [ + 10360797969, + 10639490032, + 10601390892 + ], + "samples_ts": [ + 12.3543, + 12.0307, + 12.0739 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 512 + }, + { + "timestamp_utc": "2025-12-09T04:33:36.631573+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:31:13Z\",\n \"avg_ns\": 3762216282,\n \"stddev_ns\": 554987,\n \"avg_ts\": 34.022500,\n \"stddev_ts\": 0.004988,\n \"samples_ns\": [ 3761643993, 3762744528, 3762260326 ],\n \"samples_ts\": [ 34.0277, 34.0177, 34.0221 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:31:28Z\",\n \"avg_ns\": 42774431714,\n \"stddev_ns\": 216622886,\n \"avg_ts\": 11.969972,\n \"stddev_ts\": 0.060446,\n \"samples_ns\": [ 42636784940, 43024129246, 42662380957 ],\n \"samples_ts\": [ 12.0084, 11.9003, 12.0012 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:31:13Z", + "avg_ns": 3762216282, + "stddev_ns": 554987, + "avg_ts": 34.0225, + "stddev_ts": 0.004988, + "samples_ns": [ + 3761643993, + 3762744528, + 3762260326 + ], + "samples_ts": [ + 34.0277, + 34.0177, + 34.0221 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:31:28Z", + "avg_ns": 42774431714, + "stddev_ns": 216622886, + "avg_ts": 11.969972, + "stddev_ts": 0.060446, + "samples_ns": [ + 42636784940, + 43024129246, + 42662380957 + ], + "samples_ts": [ + 12.0084, + 11.9003, + 12.0012 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 513 + }, + { + "timestamp_utc": "2025-12-09T04:35:10.813259+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:33:37Z\",\n \"avg_ns\": 15290229982,\n \"stddev_ns\": 2901894,\n \"avg_ts\": 33.485436,\n \"stddev_ts\": 0.006350,\n \"samples_ns\": [ 15292800905, 15287087515, 15290801527 ],\n \"samples_ts\": [ 33.4798, 33.4923, 33.4842 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:34:38Z\",\n \"avg_ns\": 10650387508,\n \"stddev_ns\": 171959139,\n \"avg_ts\": 12.020449,\n \"stddev_ts\": 0.195775,\n \"samples_ns\": [ 10453462445, 10770880121, 10726819959 ],\n \"samples_ts\": [ 12.2447, 11.8839, 11.9327 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:33:37Z", + "avg_ns": 15290229982, + "stddev_ns": 2901894, + "avg_ts": 33.485436, + "stddev_ts": 0.00635, + "samples_ns": [ + 15292800905, + 15287087515, + 15290801527 + ], + "samples_ts": [ + 33.4798, + 33.4923, + 33.4842 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:34:38Z", + "avg_ns": 10650387508, + "stddev_ns": 171959139, + "avg_ts": 12.020449, + "stddev_ts": 0.195775, + "samples_ns": [ + 10453462445, + 10770880121, + 10726819959 + ], + "samples_ts": [ + 12.2447, + 11.8839, + 11.9327 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 514 + }, + { + "timestamp_utc": "2025-12-09T04:38:22.260746+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:35:11Z\",\n \"avg_ns\": 15280901507,\n \"stddev_ns\": 3133968,\n \"avg_ts\": 33.505878,\n \"stddev_ts\": 0.006872,\n \"samples_ns\": [ 15282247714, 15277319357, 15283137450 ],\n \"samples_ts\": [ 33.5029, 33.5137, 33.501 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:36:12Z\",\n \"avg_ns\": 43084634219,\n \"stddev_ns\": 80304656,\n \"avg_ts\": 11.883615,\n \"stddev_ts\": 0.022143,\n \"samples_ns\": [ 43076259600, 43168797738, 43008845320 ],\n \"samples_ts\": [ 11.8859, 11.8604, 11.9045 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:35:11Z", + "avg_ns": 15280901507, + "stddev_ns": 3133968, + "avg_ts": 33.505878, + "stddev_ts": 0.006872, + "samples_ns": [ + 15282247714, + 15277319357, + 15283137450 + ], + "samples_ts": [ + 33.5029, + 33.5137, + 33.501 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:36:12Z", + "avg_ns": 43084634219, + "stddev_ns": 80304656, + "avg_ts": 11.883615, + "stddev_ts": 0.022143, + "samples_ns": [ + 43076259600, + 43168797738, + 43008845320 + ], + "samples_ts": [ + 11.8859, + 11.8604, + 11.9045 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 515 + }, + { + "timestamp_utc": "2025-12-09T04:39:09.969951+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:38:23Z\",\n \"avg_ns\": 3764829171,\n \"stddev_ns\": 2506164,\n \"avg_ts\": 33.998897,\n \"stddev_ts\": 0.022610,\n \"samples_ns\": [ 3763429310, 3763337386, 3767720819 ],\n \"samples_ts\": [ 34.0115, 34.0124, 33.9728 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:38:38Z\",\n \"avg_ns\": 10531369681,\n \"stddev_ns\": 162725865,\n \"avg_ts\": 12.156103,\n \"stddev_ts\": 0.188192,\n \"samples_ns\": [ 10362644431, 10544118562, 10687346052 ],\n \"samples_ts\": [ 12.3521, 12.1395, 11.9768 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:38:23Z", + "avg_ns": 3764829171, + "stddev_ns": 2506164, + "avg_ts": 33.998897, + "stddev_ts": 0.02261, + "samples_ns": [ + 3763429310, + 3763337386, + 3767720819 + ], + "samples_ts": [ + 34.0115, + 34.0124, + 33.9728 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:38:38Z", + "avg_ns": 10531369681, + "stddev_ns": 162725865, + "avg_ts": 12.156103, + "stddev_ts": 0.188192, + "samples_ns": [ + 10362644431, + 10544118562, + 10687346052 + ], + "samples_ts": [ + 12.3521, + 12.1395, + 11.9768 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 516 + }, + { + "timestamp_utc": "2025-12-09T04:41:33.916545+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:39:10Z\",\n \"avg_ns\": 3762003750,\n \"stddev_ns\": 694797,\n \"avg_ts\": 34.024422,\n \"stddev_ts\": 0.006284,\n \"samples_ns\": [ 3761962031, 3762718467, 3761330752 ],\n \"samples_ts\": [ 34.0248, 34.018, 34.0305 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:39:25Z\",\n \"avg_ns\": 42610134670,\n \"stddev_ns\": 220892511,\n \"avg_ts\": 12.016136,\n \"stddev_ts\": 0.062317,\n \"samples_ns\": [ 42383917388, 42621200566, 42825286058 ],\n \"samples_ts\": [ 12.0801, 12.0128, 11.9556 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:39:10Z", + "avg_ns": 3762003750, + "stddev_ns": 694797, + "avg_ts": 34.024422, + "stddev_ts": 0.006284, + "samples_ns": [ + 3761962031, + 3762718467, + 3761330752 + ], + "samples_ts": [ + 34.0248, + 34.018, + 34.0305 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:39:25Z", + "avg_ns": 42610134670, + "stddev_ns": 220892511, + "avg_ts": 12.016136, + "stddev_ts": 0.062317, + "samples_ns": [ + 42383917388, + 42621200566, + 42825286058 + ], + "samples_ts": [ + 12.0801, + 12.0128, + 11.9556 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 517 + }, + { + "timestamp_utc": "2025-12-09T04:43:07.242755+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:41:34Z\",\n \"avg_ns\": 15082202265,\n \"stddev_ns\": 5440786,\n \"avg_ts\": 33.947300,\n \"stddev_ts\": 0.012241,\n \"samples_ns\": [ 15088477526, 15078834629, 15079294641 ],\n \"samples_ts\": [ 33.9332, 33.9549, 33.9538 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:42:35Z\",\n \"avg_ns\": 10646438788,\n \"stddev_ns\": 176979038,\n \"avg_ts\": 12.025036,\n \"stddev_ts\": 0.201832,\n \"samples_ns\": [ 10442089835, 10746973901, 10750252630 ],\n \"samples_ts\": [ 12.2581, 11.9103, 11.9067 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:41:34Z", + "avg_ns": 15082202265, + "stddev_ns": 5440786, + "avg_ts": 33.9473, + "stddev_ts": 0.012241, + "samples_ns": [ + 15088477526, + 15078834629, + 15079294641 + ], + "samples_ts": [ + 33.9332, + 33.9549, + 33.9538 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:42:35Z", + "avg_ns": 10646438788, + "stddev_ns": 176979038, + "avg_ts": 12.025036, + "stddev_ts": 0.201832, + "samples_ns": [ + 10442089835, + 10746973901, + 10750252630 + ], + "samples_ts": [ + 12.2581, + 11.9103, + 11.9067 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 518 + }, + { + "timestamp_utc": "2025-12-09T04:46:18.315048+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:43:08Z\",\n \"avg_ns\": 15086584533,\n \"stddev_ns\": 1441814,\n \"avg_ts\": 33.937436,\n \"stddev_ts\": 0.003232,\n \"samples_ns\": [ 15087522767, 15084930709, 15087300124 ],\n \"samples_ts\": [ 33.9353, 33.9412, 33.9358 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:44:08Z\",\n \"avg_ns\": 43220025926,\n \"stddev_ns\": 129733850,\n \"avg_ts\": 11.846432,\n \"stddev_ts\": 0.035621,\n \"samples_ns\": [ 43070322037, 43299614017, 43290141724 ],\n \"samples_ts\": [ 11.8875, 11.8246, 11.8272 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:43:08Z", + "avg_ns": 15086584533, + "stddev_ns": 1441814, + "avg_ts": 33.937436, + "stddev_ts": 0.003232, + "samples_ns": [ + 15087522767, + 15084930709, + 15087300124 + ], + "samples_ts": [ + 33.9353, + 33.9412, + 33.9358 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:44:08Z", + "avg_ns": 43220025926, + "stddev_ns": 129733850, + "avg_ts": 11.846432, + "stddev_ts": 0.035621, + "samples_ns": [ + 43070322037, + 43299614017, + 43290141724 + ], + "samples_ts": [ + 11.8875, + 11.8246, + 11.8272 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 519 + }, + { + "timestamp_utc": "2025-12-09T04:47:06.318002+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:46:19Z\",\n \"avg_ns\": 3762632524,\n \"stddev_ns\": 566078,\n \"avg_ts\": 34.018736,\n \"stddev_ts\": 0.005118,\n \"samples_ns\": [ 3762012270, 3763121282, 3762764020 ],\n \"samples_ts\": [ 34.0243, 34.0143, 34.0175 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:46:34Z\",\n \"avg_ns\": 10631235472,\n \"stddev_ns\": 144000615,\n \"avg_ts\": 12.041477,\n \"stddev_ts\": 0.164388,\n \"samples_ns\": [ 10464960445, 10713580401, 10715165571 ],\n \"samples_ts\": [ 12.2313, 11.9475, 11.9457 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:46:19Z", + "avg_ns": 3762632524, + "stddev_ns": 566078, + "avg_ts": 34.018736, + "stddev_ts": 0.005118, + "samples_ns": [ + 3762012270, + 3763121282, + 3762764020 + ], + "samples_ts": [ + 34.0243, + 34.0143, + 34.0175 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:46:34Z", + "avg_ns": 10631235472, + "stddev_ns": 144000615, + "avg_ts": 12.041477, + "stddev_ts": 0.164388, + "samples_ns": [ + 10464960445, + 10713580401, + 10715165571 + ], + "samples_ts": [ + 12.2313, + 11.9475, + 11.9457 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 520 + }, + { + "timestamp_utc": "2025-12-09T04:49:32.379709+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:47:07Z\",\n \"avg_ns\": 3759762027,\n \"stddev_ns\": 2889908,\n \"avg_ts\": 34.044721,\n \"stddev_ts\": 0.026146,\n \"samples_ns\": [ 3757718575, 3758500713, 3763066795 ],\n \"samples_ts\": [ 34.0632, 34.0561, 34.0148 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:47:22Z\",\n \"avg_ns\": 43311623304,\n \"stddev_ns\": 144272667,\n \"avg_ts\": 11.821395,\n \"stddev_ts\": 0.039452,\n \"samples_ns\": [ 43145437460, 43404778938, 43384653515 ],\n \"samples_ts\": [ 11.8668, 11.7959, 11.8014 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:47:07Z", + "avg_ns": 3759762027, + "stddev_ns": 2889908, + "avg_ts": 34.044721, + "stddev_ts": 0.026146, + "samples_ns": [ + 3757718575, + 3758500713, + 3763066795 + ], + "samples_ts": [ + 34.0632, + 34.0561, + 34.0148 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:47:22Z", + "avg_ns": 43311623304, + "stddev_ns": 144272667, + "avg_ts": 11.821395, + "stddev_ts": 0.039452, + "samples_ns": [ + 43145437460, + 43404778938, + 43384653515 + ], + "samples_ts": [ + 11.8668, + 11.7959, + 11.8014 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 521 + }, + { + "timestamp_utc": "2025-12-09T04:51:06.036766+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:49:33Z\",\n \"avg_ns\": 15111606763,\n \"stddev_ns\": 610246,\n \"avg_ts\": 33.881242,\n \"stddev_ts\": 0.001368,\n \"samples_ns\": [ 15112219104, 15111602553, 15110998632 ],\n \"samples_ts\": [ 33.8799, 33.8813, 33.8826 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:50:33Z\",\n \"avg_ns\": 10710884090,\n \"stddev_ns\": 65896454,\n \"avg_ts\": 11.950763,\n \"stddev_ts\": 0.073787,\n \"samples_ns\": [ 10634794728, 10749301217, 10748556326 ],\n \"samples_ts\": [ 12.036, 11.9078, 11.9086 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:49:33Z", + "avg_ns": 15111606763, + "stddev_ns": 610246, + "avg_ts": 33.881242, + "stddev_ts": 0.001368, + "samples_ns": [ + 15112219104, + 15111602553, + 15110998632 + ], + "samples_ts": [ + 33.8799, + 33.8813, + 33.8826 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:50:33Z", + "avg_ns": 10710884090, + "stddev_ns": 65896454, + "avg_ts": 11.950763, + "stddev_ts": 0.073787, + "samples_ns": [ + 10634794728, + 10749301217, + 10748556326 + ], + "samples_ts": [ + 12.036, + 11.9078, + 11.9086 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 522 + }, + { + "timestamp_utc": "2025-12-09T04:54:17.269557+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:51:06Z\",\n \"avg_ns\": 15111366432,\n \"stddev_ns\": 1116722,\n \"avg_ts\": 33.881781,\n \"stddev_ts\": 0.002489,\n \"samples_ns\": [ 15111420418, 15110230489, 15112448390 ],\n \"samples_ts\": [ 33.8817, 33.8843, 33.8794 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:52:07Z\",\n \"avg_ns\": 43238133889,\n \"stddev_ns\": 94730526,\n \"avg_ts\": 11.841437,\n \"stddev_ts\": 0.025976,\n \"samples_ns\": [ 43128927351, 43298144107, 43287330211 ],\n \"samples_ts\": [ 11.8714, 11.825, 11.8279 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:51:06Z", + "avg_ns": 15111366432, + "stddev_ns": 1116722, + "avg_ts": 33.881781, + "stddev_ts": 0.002489, + "samples_ns": [ + 15111420418, + 15110230489, + 15112448390 + ], + "samples_ts": [ + 33.8817, + 33.8843, + 33.8794 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:52:07Z", + "avg_ns": 43238133889, + "stddev_ns": 94730526, + "avg_ts": 11.841437, + "stddev_ts": 0.025976, + "samples_ns": [ + 43128927351, + 43298144107, + 43287330211 + ], + "samples_ts": [ + 11.8714, + 11.825, + 11.8279 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 523 + }, + { + "timestamp_utc": "2025-12-09T04:55:05.361239+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:54:18Z\",\n \"avg_ns\": 3760245387,\n \"stddev_ns\": 1473919,\n \"avg_ts\": 34.040335,\n \"stddev_ts\": 0.013317,\n \"samples_ns\": [ 3759166532, 3761921465, 3759648166 ],\n \"samples_ts\": [ 34.0501, 34.0252, 34.0457 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:54:33Z\",\n \"avg_ns\": 10661019609,\n \"stddev_ns\": 130518407,\n \"avg_ts\": 12.007565,\n \"stddev_ts\": 0.148050,\n \"samples_ns\": [ 10510312274, 10735645898, 10737100655 ],\n \"samples_ts\": [ 12.1785, 11.9229, 11.9213 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:54:18Z", + "avg_ns": 3760245387, + "stddev_ns": 1473919, + "avg_ts": 34.040335, + "stddev_ts": 0.013317, + "samples_ns": [ + 3759166532, + 3761921465, + 3759648166 + ], + "samples_ts": [ + 34.0501, + 34.0252, + 34.0457 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:54:33Z", + "avg_ns": 10661019609, + "stddev_ns": 130518407, + "avg_ts": 12.007565, + "stddev_ts": 0.14805, + "samples_ns": [ + 10510312274, + 10735645898, + 10737100655 + ], + "samples_ts": [ + 12.1785, + 11.9229, + 11.9213 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 524 + }, + { + "timestamp_utc": "2025-12-09T04:57:30.457876+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:55:06Z\",\n \"avg_ns\": 3757933549,\n \"stddev_ns\": 1351185,\n \"avg_ts\": 34.061276,\n \"stddev_ts\": 0.012236,\n \"samples_ns\": [ 3756452362, 3758254016, 3759094270 ],\n \"samples_ts\": [ 34.0747, 34.0584, 34.0508 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:55:21Z\",\n \"avg_ns\": 42998777287,\n \"stddev_ns\": 278150952,\n \"avg_ts\": 11.907649,\n \"stddev_ts\": 0.077317,\n \"samples_ns\": [ 42677599421, 43158162027, 43160570415 ],\n \"samples_ts\": [ 11.9969, 11.8633, 11.8627 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:55:06Z", + "avg_ns": 3757933549, + "stddev_ns": 1351185, + "avg_ts": 34.061276, + "stddev_ts": 0.012236, + "samples_ns": [ + 3756452362, + 3758254016, + 3759094270 + ], + "samples_ts": [ + 34.0747, + 34.0584, + 34.0508 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T04:55:21Z", + "avg_ns": 42998777287, + "stddev_ns": 278150952, + "avg_ts": 11.907649, + "stddev_ts": 0.077317, + "samples_ns": [ + 42677599421, + 43158162027, + 43160570415 + ], + "samples_ts": [ + 11.9969, + 11.8633, + 11.8627 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 525 + }, + { + "timestamp_utc": "2025-12-09T04:59:04.674708+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:57:31Z\",\n \"avg_ns\": 15274243572,\n \"stddev_ns\": 724192,\n \"avg_ts\": 33.520482,\n \"stddev_ts\": 0.001542,\n \"samples_ns\": [ 15273435021, 15274587942, 15274707755 ],\n \"samples_ts\": [ 33.5223, 33.5197, 33.5195 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:58:32Z\",\n \"avg_ns\": 10666558945,\n \"stddev_ns\": 182912866,\n \"avg_ts\": 12.002498,\n \"stddev_ts\": 0.207879,\n \"samples_ns\": [ 10455354987, 10773488086, 10770833764 ],\n \"samples_ts\": [ 12.2425, 11.881, 11.8839 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:57:31Z", + "avg_ns": 15274243572, + "stddev_ns": 724192, + "avg_ts": 33.520482, + "stddev_ts": 0.001542, + "samples_ns": [ + 15273435021, + 15274587942, + 15274707755 + ], + "samples_ts": [ + 33.5223, + 33.5197, + 33.5195 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T04:58:32Z", + "avg_ns": 10666558945, + "stddev_ns": 182912866, + "avg_ts": 12.002498, + "stddev_ts": 0.207879, + "samples_ns": [ + 10455354987, + 10773488086, + 10770833764 + ], + "samples_ts": [ + 12.2425, + 11.881, + 11.8839 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 526 + }, + { + "timestamp_utc": "2025-12-09T05:02:16.262563+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T04:59:05Z\",\n \"avg_ns\": 15278339741,\n \"stddev_ns\": 1949914,\n \"avg_ts\": 33.511495,\n \"stddev_ts\": 0.004277,\n \"samples_ns\": [ 15280587497, 15277102455, 15277329271 ],\n \"samples_ts\": [ 33.5066, 33.5142, 33.5137 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:00:06Z\",\n \"avg_ns\": 43134987224,\n \"stddev_ns\": 55052928,\n \"avg_ts\": 11.869728,\n \"stddev_ts\": 0.015148,\n \"samples_ns\": [ 43081013713, 43191059543, 43132888416 ],\n \"samples_ts\": [ 11.8846, 11.8543, 11.8703 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T04:59:05Z", + "avg_ns": 15278339741, + "stddev_ns": 1949914, + "avg_ts": 33.511495, + "stddev_ts": 0.004277, + "samples_ns": [ + 15280587497, + 15277102455, + 15277329271 + ], + "samples_ts": [ + 33.5066, + 33.5142, + 33.5137 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:00:06Z", + "avg_ns": 43134987224, + "stddev_ns": 55052928, + "avg_ts": 11.869728, + "stddev_ts": 0.015148, + "samples_ns": [ + 43081013713, + 43191059543, + 43132888416 + ], + "samples_ts": [ + 11.8846, + 11.8543, + 11.8703 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 527 + }, + { + "timestamp_utc": "2025-12-09T05:03:03.858227+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:02:17Z\",\n \"avg_ns\": 3759217820,\n \"stddev_ns\": 392828,\n \"avg_ts\": 34.049637,\n \"stddev_ts\": 0.003471,\n \"samples_ns\": [ 3759473192, 3759403001, 3758777269 ],\n \"samples_ts\": [ 34.0473, 34.048, 34.0536 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:02:32Z\",\n \"avg_ns\": 10477855518,\n \"stddev_ns\": 164447978,\n \"avg_ts\": 12.218232,\n \"stddev_ts\": 0.190321,\n \"samples_ns\": [ 10353416397, 10415860365, 10664289794 ],\n \"samples_ts\": [ 12.3631, 12.289, 12.0027 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:02:17Z", + "avg_ns": 3759217820, + "stddev_ns": 392828, + "avg_ts": 34.049637, + "stddev_ts": 0.003471, + "samples_ns": [ + 3759473192, + 3759403001, + 3758777269 + ], + "samples_ts": [ + 34.0473, + 34.048, + 34.0536 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:02:32Z", + "avg_ns": 10477855518, + "stddev_ns": 164447978, + "avg_ts": 12.218232, + "stddev_ts": 0.190321, + "samples_ns": [ + 10353416397, + 10415860365, + 10664289794 + ], + "samples_ts": [ + 12.3631, + 12.289, + 12.0027 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 528 + }, + { + "timestamp_utc": "2025-12-09T05:05:27.207370+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:03:04Z\",\n \"avg_ns\": 3762337247,\n \"stddev_ns\": 924120,\n \"avg_ts\": 34.021407,\n \"stddev_ts\": 0.008320,\n \"samples_ns\": [ 3763214355, 3761379573, 3762417815 ],\n \"samples_ts\": [ 34.0135, 34.0301, 34.0207 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:03:19Z\",\n \"avg_ns\": 42413161674,\n \"stddev_ns\": 298294811,\n \"avg_ts\": 12.072122,\n \"stddev_ts\": 0.084815,\n \"samples_ns\": [ 42131476031, 42382337103, 42725671888 ],\n \"samples_ts\": [ 12.1524, 12.0805, 11.9834 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:03:04Z", + "avg_ns": 3762337247, + "stddev_ns": 924120, + "avg_ts": 34.021407, + "stddev_ts": 0.00832, + "samples_ns": [ + 3763214355, + 3761379573, + 3762417815 + ], + "samples_ts": [ + 34.0135, + 34.0301, + 34.0207 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:03:19Z", + "avg_ns": 42413161674, + "stddev_ns": 298294811, + "avg_ts": 12.072122, + "stddev_ts": 0.084815, + "samples_ns": [ + 42131476031, + 42382337103, + 42725671888 + ], + "samples_ts": [ + 12.1524, + 12.0805, + 11.9834 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 529 + }, + { + "timestamp_utc": "2025-12-09T05:07:00.399062+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:05:28Z\",\n \"avg_ns\": 15081336354,\n \"stddev_ns\": 3347948,\n \"avg_ts\": 33.949247,\n \"stddev_ts\": 0.007532,\n \"samples_ns\": [ 15084615178, 15081466310, 15077927575 ],\n \"samples_ts\": [ 33.9419, 33.949, 33.9569 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:06:28Z\",\n \"avg_ns\": 10602291688,\n \"stddev_ns\": 212270070,\n \"avg_ts\": 12.076111,\n \"stddev_ts\": 0.243478,\n \"samples_ns\": [ 10369017332, 10653768546, 10784089188 ],\n \"samples_ts\": [ 12.3445, 12.0145, 11.8693 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:05:28Z", + "avg_ns": 15081336354, + "stddev_ns": 3347948, + "avg_ts": 33.949247, + "stddev_ts": 0.007532, + "samples_ns": [ + 15084615178, + 15081466310, + 15077927575 + ], + "samples_ts": [ + 33.9419, + 33.949, + 33.9569 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:06:28Z", + "avg_ns": 10602291688, + "stddev_ns": 212270070, + "avg_ts": 12.076111, + "stddev_ts": 0.243478, + "samples_ns": [ + 10369017332, + 10653768546, + 10784089188 + ], + "samples_ts": [ + 12.3445, + 12.0145, + 11.8693 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 530 + }, + { + "timestamp_utc": "2025-12-09T05:10:10.534263+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:07:01Z\",\n \"avg_ns\": 15081739491,\n \"stddev_ns\": 1760383,\n \"avg_ts\": 33.948339,\n \"stddev_ts\": 0.003943,\n \"samples_ns\": [ 15080836652, 15083758536, 15080623287 ],\n \"samples_ts\": [ 33.9504, 33.9438, 33.9509 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:08:01Z\",\n \"avg_ns\": 42914346916,\n \"stddev_ns\": 367541821,\n \"avg_ts\": 11.931328,\n \"stddev_ts\": 0.102672,\n \"samples_ns\": [ 43089311562, 43161718942, 42492010246 ],\n \"samples_ts\": [ 11.8823, 11.8624, 12.0493 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:07:01Z", + "avg_ns": 15081739491, + "stddev_ns": 1760383, + "avg_ts": 33.948339, + "stddev_ts": 0.003943, + "samples_ns": [ + 15080836652, + 15083758536, + 15080623287 + ], + "samples_ts": [ + 33.9504, + 33.9438, + 33.9509 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:08:01Z", + "avg_ns": 42914346916, + "stddev_ns": 367541821, + "avg_ts": 11.931328, + "stddev_ts": 0.102672, + "samples_ns": [ + 43089311562, + 43161718942, + 42492010246 + ], + "samples_ts": [ + 11.8823, + 11.8624, + 12.0493 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 531 + }, + { + "timestamp_utc": "2025-12-09T05:10:57.819355+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:10:11Z\",\n \"avg_ns\": 3772660782,\n \"stddev_ns\": 16797066,\n \"avg_ts\": 33.928756,\n \"stddev_ts\": 0.150677,\n \"samples_ns\": [ 3792035224, 3763755173, 3762191950 ],\n \"samples_ts\": [ 33.755, 34.0086, 34.0227 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:10:26Z\",\n \"avg_ns\": 10363949865,\n \"stddev_ns\": 45247028,\n \"avg_ts\": 12.350660,\n \"stddev_ts\": 0.053786,\n \"samples_ns\": [ 10339309925, 10336370604, 10416169066 ],\n \"samples_ts\": [ 12.3799, 12.3835, 12.2886 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:10:11Z", + "avg_ns": 3772660782, + "stddev_ns": 16797066, + "avg_ts": 33.928756, + "stddev_ts": 0.150677, + "samples_ns": [ + 3792035224, + 3763755173, + 3762191950 + ], + "samples_ts": [ + 33.755, + 34.0086, + 34.0227 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:10:26Z", + "avg_ns": 10363949865, + "stddev_ns": 45247028, + "avg_ts": 12.35066, + "stddev_ts": 0.053786, + "samples_ns": [ + 10339309925, + 10336370604, + 10416169066 + ], + "samples_ts": [ + 12.3799, + 12.3835, + 12.2886 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 532 + }, + { + "timestamp_utc": "2025-12-09T05:13:21.696429+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:10:58Z\",\n \"avg_ns\": 3759815322,\n \"stddev_ns\": 416733,\n \"avg_ts\": 34.044226,\n \"stddev_ts\": 0.003732,\n \"samples_ns\": [ 3760263025, 3759451547, 3759731395 ],\n \"samples_ts\": [ 34.0402, 34.0475, 34.045 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:11:13Z\",\n \"avg_ns\": 42590016430,\n \"stddev_ns\": 3060016973,\n \"avg_ts\": 12.022217,\n \"stddev_ts\": 0.105799,\n \"samples_ns\": [ 42208762938, 42603679657, 42957606697 ],\n \"samples_ts\": [ 12.1302, 12.0177, 11.9187 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:10:58Z", + "avg_ns": 3759815322, + "stddev_ns": 416733, + "avg_ts": 34.044226, + "stddev_ts": 0.003732, + "samples_ns": [ + 3760263025, + 3759451547, + 3759731395 + ], + "samples_ts": [ + 34.0402, + 34.0475, + 34.045 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:11:13Z", + "avg_ns": 42590016430, + "stddev_ns": 3060016973, + "avg_ts": 12.022217, + "stddev_ts": 0.105799, + "samples_ns": [ + 42208762938, + 42603679657, + 42957606697 + ], + "samples_ts": [ + 12.1302, + 12.0177, + 11.9187 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 533 + }, + { + "timestamp_utc": "2025-12-09T05:14:55.197399+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:13:22Z\",\n \"avg_ns\": 15100824608,\n \"stddev_ns\": 2851873,\n \"avg_ts\": 33.905434,\n \"stddev_ts\": 0.006398,\n \"samples_ns\": [ 15101511103, 15097694853, 15103267869 ],\n \"samples_ts\": [ 33.9039, 33.9125, 33.8999 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:14:23Z\",\n \"avg_ns\": 10676888726,\n \"stddev_ns\": 81842815,\n \"avg_ts\": 11.988983,\n \"stddev_ts\": 0.092306,\n \"samples_ns\": [ 10582466391, 10720699378, 10727500409 ],\n \"samples_ts\": [ 12.0955, 11.9395, 11.932 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:13:22Z", + "avg_ns": 15100824608, + "stddev_ns": 2851873, + "avg_ts": 33.905434, + "stddev_ts": 0.006398, + "samples_ns": [ + 15101511103, + 15097694853, + 15103267869 + ], + "samples_ts": [ + 33.9039, + 33.9125, + 33.8999 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:14:23Z", + "avg_ns": 10676888726, + "stddev_ns": 81842815, + "avg_ts": 11.988983, + "stddev_ts": 0.092306, + "samples_ns": [ + 10582466391, + 10720699378, + 10727500409 + ], + "samples_ts": [ + 12.0955, + 11.9395, + 11.932 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 534 + }, + { + "timestamp_utc": "2025-12-09T05:18:06.241632+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:14:56Z\",\n \"avg_ns\": 15098116843,\n \"stddev_ns\": 1392580,\n \"avg_ts\": 33.911514,\n \"stddev_ts\": 0.003128,\n \"samples_ns\": [ 15097842481, 15099626185, 15096881863 ],\n \"samples_ts\": [ 33.9121, 33.9081, 33.9143 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:15:56Z\",\n \"avg_ns\": 43189687554,\n \"stddev_ns\": 84840601,\n \"avg_ts\": 11.854712,\n \"stddev_ts\": 0.023311,\n \"samples_ns\": [ 43092849454, 43250937351, 43225275859 ],\n \"samples_ts\": [ 11.8813, 11.8379, 11.8449 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:14:56Z", + "avg_ns": 15098116843, + "stddev_ns": 1392580, + "avg_ts": 33.911514, + "stddev_ts": 0.003128, + "samples_ns": [ + 15097842481, + 15099626185, + 15096881863 + ], + "samples_ts": [ + 33.9121, + 33.9081, + 33.9143 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:15:56Z", + "avg_ns": 43189687554, + "stddev_ns": 84840601, + "avg_ts": 11.854712, + "stddev_ts": 0.023311, + "samples_ns": [ + 43092849454, + 43250937351, + 43225275859 + ], + "samples_ts": [ + 11.8813, + 11.8379, + 11.8449 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 535 + }, + { + "timestamp_utc": "2025-12-09T05:18:54.402250+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:18:07Z\",\n \"avg_ns\": 3759869937,\n \"stddev_ns\": 1358863,\n \"avg_ts\": 34.043734,\n \"stddev_ts\": 0.012290,\n \"samples_ns\": [ 3758718601, 3759524428, 3761366783 ],\n \"samples_ts\": [ 34.0542, 34.0469, 34.0302 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:18:22Z\",\n \"avg_ns\": 10682389795,\n \"stddev_ns\": 155059248,\n \"avg_ts\": 11.984034,\n \"stddev_ts\": 0.175327,\n \"samples_ns\": [ 10504683144, 10752305874, 10790180368 ],\n \"samples_ts\": [ 12.185, 11.9044, 11.8626 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:18:07Z", + "avg_ns": 3759869937, + "stddev_ns": 1358863, + "avg_ts": 34.043734, + "stddev_ts": 0.01229, + "samples_ns": [ + 3758718601, + 3759524428, + 3761366783 + ], + "samples_ts": [ + 34.0542, + 34.0469, + 34.0302 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:18:22Z", + "avg_ns": 10682389795, + "stddev_ns": 155059248, + "avg_ts": 11.984034, + "stddev_ts": 0.175327, + "samples_ns": [ + 10504683144, + 10752305874, + 10790180368 + ], + "samples_ts": [ + 12.185, + 11.9044, + 11.8626 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 536 + }, + { + "timestamp_utc": "2025-12-09T05:21:19.216400+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:18:55Z\",\n \"avg_ns\": 3763140115,\n \"stddev_ns\": 469666,\n \"avg_ts\": 34.014147,\n \"stddev_ts\": 0.004209,\n \"samples_ns\": [ 3762820886, 3762925039, 3763674421 ],\n \"samples_ts\": [ 34.017, 34.0161, 34.0093 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:19:10Z\",\n \"avg_ns\": 42898808304,\n \"stddev_ns\": 204236236,\n \"avg_ts\": 11.935244,\n \"stddev_ts\": 0.056902,\n \"samples_ns\": [ 42676971740, 42940417436, 43079035738 ],\n \"samples_ts\": [ 11.9971, 11.9235, 11.8851 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:18:55Z", + "avg_ns": 3763140115, + "stddev_ns": 469666, + "avg_ts": 34.014147, + "stddev_ts": 0.004209, + "samples_ns": [ + 3762820886, + 3762925039, + 3763674421 + ], + "samples_ts": [ + 34.017, + 34.0161, + 34.0093 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:19:10Z", + "avg_ns": 42898808304, + "stddev_ns": 204236236, + "avg_ts": 11.935244, + "stddev_ts": 0.056902, + "samples_ns": [ + 42676971740, + 42940417436, + 43079035738 + ], + "samples_ts": [ + 11.9971, + 11.9235, + 11.8851 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 537 + }, + { + "timestamp_utc": "2025-12-09T05:22:53.614707+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:21:20Z\",\n \"avg_ns\": 15316823141,\n \"stddev_ns\": 1227206,\n \"avg_ts\": 33.427297,\n \"stddev_ts\": 0.002678,\n \"samples_ns\": [ 15318069326, 15316784259, 15315615838 ],\n \"samples_ts\": [ 33.4246, 33.4274, 33.4299 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:22:21Z\",\n \"avg_ns\": 10686383103,\n \"stddev_ns\": 167745521,\n \"avg_ts\": 11.979846,\n \"stddev_ts\": 0.189760,\n \"samples_ns\": [ 10492802958, 10777376138, 10788970213 ],\n \"samples_ts\": [ 12.1988, 11.8767, 11.864 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:21:20Z", + "avg_ns": 15316823141, + "stddev_ns": 1227206, + "avg_ts": 33.427297, + "stddev_ts": 0.002678, + "samples_ns": [ + 15318069326, + 15316784259, + 15315615838 + ], + "samples_ts": [ + 33.4246, + 33.4274, + 33.4299 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:22:21Z", + "avg_ns": 10686383103, + "stddev_ns": 167745521, + "avg_ts": 11.979846, + "stddev_ts": 0.18976, + "samples_ns": [ + 10492802958, + 10777376138, + 10788970213 + ], + "samples_ts": [ + 12.1988, + 11.8767, + 11.864 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 538 + }, + { + "timestamp_utc": "2025-12-09T05:26:05.652975+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:22:54Z\",\n \"avg_ns\": 15321065405,\n \"stddev_ns\": 2466756,\n \"avg_ts\": 33.418042,\n \"stddev_ts\": 0.005381,\n \"samples_ns\": [ 15323272318, 15318402449, 15321521448 ],\n \"samples_ts\": [ 33.4132, 33.4239, 33.417 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:23:55Z\",\n \"avg_ns\": 43228220709,\n \"stddev_ns\": 78932446,\n \"avg_ts\": 11.844141,\n \"stddev_ts\": 0.021650,\n \"samples_ns\": [ 43137077697, 43273828654, 43273755777 ],\n \"samples_ts\": [ 11.8691, 11.8316, 11.8317 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:22:54Z", + "avg_ns": 15321065405, + "stddev_ns": 2466756, + "avg_ts": 33.418042, + "stddev_ts": 0.005381, + "samples_ns": [ + 15323272318, + 15318402449, + 15321521448 + ], + "samples_ts": [ + 33.4132, + 33.4239, + 33.417 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:23:55Z", + "avg_ns": 43228220709, + "stddev_ns": 78932446, + "avg_ts": 11.844141, + "stddev_ts": 0.02165, + "samples_ns": [ + 43137077697, + 43273828654, + 43273755777 + ], + "samples_ts": [ + 11.8691, + 11.8316, + 11.8317 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 539 + }, + { + "timestamp_utc": "2025-12-09T05:26:54.795629+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:26:06Z\",\n \"avg_ns\": 2921545650,\n \"stddev_ns\": 35050367,\n \"avg_ts\": 43.816659,\n \"stddev_ts\": 0.529338,\n \"samples_ns\": [ 2881080432, 2942451200, 2941105318 ],\n \"samples_ts\": [ 44.4278, 43.5011, 43.5211 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:26:18Z\",\n \"avg_ns\": 12139360331,\n \"stddev_ns\": 91029410,\n \"avg_ts\": 10.544610,\n \"stddev_ts\": 0.079364,\n \"samples_ns\": [ 12036004203, 12174471123, 12207605668 ],\n \"samples_ts\": [ 10.6348, 10.5138, 10.4853 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:26:06Z", + "avg_ns": 2921545650, + "stddev_ns": 35050367, + "avg_ts": 43.816659, + "stddev_ts": 0.529338, + "samples_ns": [ + 2881080432, + 2942451200, + 2941105318 + ], + "samples_ts": [ + 44.4278, + 43.5011, + 43.5211 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:26:18Z", + "avg_ns": 12139360331, + "stddev_ns": 91029410, + "avg_ts": 10.54461, + "stddev_ts": 0.079364, + "samples_ns": [ + 12036004203, + 12174471123, + 12207605668 + ], + "samples_ts": [ + 10.6348, + 10.5138, + 10.4853 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 540 + }, + { + "timestamp_utc": "2025-12-09T05:29:34.128349+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:26:55Z\",\n \"avg_ns\": 2915086924,\n \"stddev_ns\": 67260496,\n \"avg_ts\": 43.924882,\n \"stddev_ts\": 1.000166,\n \"samples_ns\": [ 2876510205, 2992752069, 2875998499 ],\n \"samples_ts\": [ 44.4984, 42.77, 44.5063 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:27:07Z\",\n \"avg_ns\": 48873802036,\n \"stddev_ns\": 192304198,\n \"avg_ts\": 10.476068,\n \"stddev_ts\": 0.041191,\n \"samples_ns\": [ 48694410578, 48850162708, 49076832824 ],\n \"samples_ts\": [ 10.5146, 10.481, 10.4326 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:26:55Z", + "avg_ns": 2915086924, + "stddev_ns": 67260496, + "avg_ts": 43.924882, + "stddev_ts": 1.000166, + "samples_ns": [ + 2876510205, + 2992752069, + 2875998499 + ], + "samples_ts": [ + 44.4984, + 42.77, + 44.5063 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:27:07Z", + "avg_ns": 48873802036, + "stddev_ns": 192304198, + "avg_ts": 10.476068, + "stddev_ts": 0.041191, + "samples_ns": [ + 48694410578, + 48850162708, + 49076832824 + ], + "samples_ts": [ + 10.5146, + 10.481, + 10.4326 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 541 + }, + { + "timestamp_utc": "2025-12-09T05:30:58.599470+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:29:35Z\",\n \"avg_ns\": 11774553163,\n \"stddev_ns\": 145344713,\n \"avg_ts\": 43.488019,\n \"stddev_ts\": 0.536614,\n \"samples_ns\": [ 11631006155, 11921631079, 11771022257 ],\n \"samples_ts\": [ 44.0203, 42.9471, 43.4966 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:30:22Z\",\n \"avg_ns\": 12136082242,\n \"stddev_ns\": 9053466,\n \"avg_ts\": 10.547065,\n \"stddev_ts\": 0.007866,\n \"samples_ns\": [ 12128051175, 12145892566, 12134302986 ],\n \"samples_ts\": [ 10.554, 10.5385, 10.5486 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:29:35Z", + "avg_ns": 11774553163, + "stddev_ns": 145344713, + "avg_ts": 43.488019, + "stddev_ts": 0.536614, + "samples_ns": [ + 11631006155, + 11921631079, + 11771022257 + ], + "samples_ts": [ + 44.0203, + 42.9471, + 43.4966 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:30:22Z", + "avg_ns": 12136082242, + "stddev_ns": 9053466, + "avg_ts": 10.547065, + "stddev_ts": 0.007866, + "samples_ns": [ + 12128051175, + 12145892566, + 12134302986 + ], + "samples_ts": [ + 10.554, + 10.5385, + 10.5486 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 542 + }, + { + "timestamp_utc": "2025-12-09T05:34:12.610547+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:30:59Z\",\n \"avg_ns\": 11547385633,\n \"stddev_ns\": 59521618,\n \"avg_ts\": 44.339824,\n \"stddev_ts\": 0.227890,\n \"samples_ns\": [ 11508777193, 11615932764, 11517446942 ],\n \"samples_ts\": [ 44.4878, 44.0774, 44.4543 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:31:45Z\",\n \"avg_ns\": 48924148126,\n \"stddev_ns\": 58701448,\n \"avg_ts\": 10.465190,\n \"stddev_ts\": 0.012550,\n \"samples_ns\": [ 48878276803, 48990299856, 48903867721 ],\n \"samples_ts\": [ 10.475, 10.451, 10.4695 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:30:59Z", + "avg_ns": 11547385633, + "stddev_ns": 59521618, + "avg_ts": 44.339824, + "stddev_ts": 0.22789, + "samples_ns": [ + 11508777193, + 11615932764, + 11517446942 + ], + "samples_ts": [ + 44.4878, + 44.0774, + 44.4543 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:31:45Z", + "avg_ns": 48924148126, + "stddev_ns": 58701448, + "avg_ts": 10.46519, + "stddev_ts": 0.01255, + "samples_ns": [ + 48878276803, + 48990299856, + 48903867721 + ], + "samples_ts": [ + 10.475, + 10.451, + 10.4695 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 543 + }, + { + "timestamp_utc": "2025-12-09T05:35:01.661424+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:34:13Z\",\n \"avg_ns\": 2882360322,\n \"stddev_ns\": 1226033,\n \"avg_ts\": 44.408055,\n \"stddev_ts\": 0.018889,\n \"samples_ns\": [ 2883606143, 2881155084, 2882319739 ],\n \"samples_ts\": [ 44.3889, 44.4266, 44.4087 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:34:25Z\",\n \"avg_ns\": 12132645288,\n \"stddev_ns\": 51678750,\n \"avg_ts\": 10.550177,\n \"stddev_ts\": 0.045041,\n \"samples_ns\": [ 12073438821, 12168700402, 12155796642 ],\n \"samples_ts\": [ 10.6018, 10.5188, 10.53 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:34:13Z", + "avg_ns": 2882360322, + "stddev_ns": 1226033, + "avg_ts": 44.408055, + "stddev_ts": 0.018889, + "samples_ns": [ + 2883606143, + 2881155084, + 2882319739 + ], + "samples_ts": [ + 44.3889, + 44.4266, + 44.4087 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:34:25Z", + "avg_ns": 12132645288, + "stddev_ns": 51678750, + "avg_ts": 10.550177, + "stddev_ts": 0.045041, + "samples_ns": [ + 12073438821, + 12168700402, + 12155796642 + ], + "samples_ts": [ + 10.6018, + 10.5188, + 10.53 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 544 + }, + { + "timestamp_utc": "2025-12-09T05:37:41.095536+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:35:02Z\",\n \"avg_ns\": 2880932280,\n \"stddev_ns\": 2105819,\n \"avg_ts\": 44.430078,\n \"stddev_ts\": 0.032453,\n \"samples_ns\": [ 2879510982, 2883350714, 2879935145 ],\n \"samples_ts\": [ 44.452, 44.3928, 44.4454 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:35:14Z\",\n \"avg_ns\": 48940081223,\n \"stddev_ns\": 84357743,\n \"avg_ts\": 10.461793,\n \"stddev_ts\": 0.018049,\n \"samples_ns\": [ 48977705438, 48999079430, 48843458803 ],\n \"samples_ts\": [ 10.4537, 10.4492, 10.4825 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:35:02Z", + "avg_ns": 2880932280, + "stddev_ns": 2105819, + "avg_ts": 44.430078, + "stddev_ts": 0.032453, + "samples_ns": [ + 2879510982, + 2883350714, + 2879935145 + ], + "samples_ts": [ + 44.452, + 44.3928, + 44.4454 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:35:14Z", + "avg_ns": 48940081223, + "stddev_ns": 84357743, + "avg_ts": 10.461793, + "stddev_ts": 0.018049, + "samples_ns": [ + 48977705438, + 48999079430, + 48843458803 + ], + "samples_ts": [ + 10.4537, + 10.4492, + 10.4825 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 545 + }, + { + "timestamp_utc": "2025-12-09T05:39:04.909471+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:37:41Z\",\n \"avg_ns\": 11563084758,\n \"stddev_ns\": 3935651,\n \"avg_ts\": 44.278845,\n \"stddev_ts\": 0.015062,\n \"samples_ns\": [ 11564871244, 11558575914, 11565807118 ],\n \"samples_ts\": [ 44.272, 44.2961, 44.2684 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:38:28Z\",\n \"avg_ns\": 12163154640,\n \"stddev_ns\": 37326917,\n \"avg_ts\": 10.523652,\n \"stddev_ts\": 0.032351,\n \"samples_ns\": [ 12120245898, 12181083582, 12188134440 ],\n \"samples_ts\": [ 10.5608, 10.5081, 10.502 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:37:41Z", + "avg_ns": 11563084758, + "stddev_ns": 3935651, + "avg_ts": 44.278845, + "stddev_ts": 0.015062, + "samples_ns": [ + 11564871244, + 11558575914, + 11565807118 + ], + "samples_ts": [ + 44.272, + 44.2961, + 44.2684 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:38:28Z", + "avg_ns": 12163154640, + "stddev_ns": 37326917, + "avg_ts": 10.523652, + "stddev_ts": 0.032351, + "samples_ns": [ + 12120245898, + 12181083582, + 12188134440 + ], + "samples_ts": [ + 10.5608, + 10.5081, + 10.502 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 546 + }, + { + "timestamp_utc": "2025-12-09T05:42:19.595986+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:39:05Z\",\n \"avg_ns\": 11640892572,\n \"stddev_ns\": 58751538,\n \"avg_ts\": 43.983631,\n \"stddev_ts\": 0.222626,\n \"samples_ns\": [ 11573144793, 11671698828, 11677834096 ],\n \"samples_ts\": [ 44.2404, 43.8668, 43.8437 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:39:52Z\",\n \"avg_ns\": 49021398888,\n \"stddev_ns\": 35107013,\n \"avg_ts\": 10.444422,\n \"stddev_ts\": 0.007479,\n \"samples_ns\": [ 48988660475, 49017066609, 49058469582 ],\n \"samples_ts\": [ 10.4514, 10.4453, 10.4365 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:39:05Z", + "avg_ns": 11640892572, + "stddev_ns": 58751538, + "avg_ts": 43.983631, + "stddev_ts": 0.222626, + "samples_ns": [ + 11573144793, + 11671698828, + 11677834096 + ], + "samples_ts": [ + 44.2404, + 43.8668, + 43.8437 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:39:52Z", + "avg_ns": 49021398888, + "stddev_ns": 35107013, + "avg_ts": 10.444422, + "stddev_ts": 0.007479, + "samples_ns": [ + 48988660475, + 49017066609, + 49058469582 + ], + "samples_ts": [ + 10.4514, + 10.4453, + 10.4365 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 547 + }, + { + "timestamp_utc": "2025-12-09T05:43:08.544719+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:42:20Z\",\n \"avg_ns\": 2877966390,\n \"stddev_ns\": 1057074,\n \"avg_ts\": 44.475853,\n \"stddev_ts\": 0.016296,\n \"samples_ns\": [ 2876790769, 2878828219, 2878280184 ],\n \"samples_ts\": [ 44.494, 44.4625, 44.471 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:42:32Z\",\n \"avg_ns\": 12118060218,\n \"stddev_ns\": 55400774,\n \"avg_ts\": 10.562894,\n \"stddev_ts\": 0.048418,\n \"samples_ns\": [ 12054107331, 12151361353, 12148711971 ],\n \"samples_ts\": [ 10.6188, 10.5338, 10.5361 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:42:20Z", + "avg_ns": 2877966390, + "stddev_ns": 1057074, + "avg_ts": 44.475853, + "stddev_ts": 0.016296, + "samples_ns": [ + 2876790769, + 2878828219, + 2878280184 + ], + "samples_ts": [ + 44.494, + 44.4625, + 44.471 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:42:32Z", + "avg_ns": 12118060218, + "stddev_ns": 55400774, + "avg_ts": 10.562894, + "stddev_ts": 0.048418, + "samples_ns": [ + 12054107331, + 12151361353, + 12148711971 + ], + "samples_ts": [ + 10.6188, + 10.5338, + 10.5361 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 548 + }, + { + "timestamp_utc": "2025-12-09T05:45:47.774402+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:43:09Z\",\n \"avg_ns\": 2884399472,\n \"stddev_ns\": 1562127,\n \"avg_ts\": 44.376664,\n \"stddev_ts\": 0.024010,\n \"samples_ns\": [ 2885731448, 2884784130, 2882682840 ],\n \"samples_ts\": [ 44.3562, 44.3707, 44.4031 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:43:20Z\",\n \"avg_ns\": 48872137761,\n \"stddev_ns\": 61942135,\n \"avg_ts\": 10.476328,\n \"stddev_ts\": 0.013284,\n \"samples_ns\": [ 48886807727, 48925427636, 48804177921 ],\n \"samples_ts\": [ 10.4732, 10.4649, 10.4909 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:43:09Z", + "avg_ns": 2884399472, + "stddev_ns": 1562127, + "avg_ts": 44.376664, + "stddev_ts": 0.02401, + "samples_ns": [ + 2885731448, + 2884784130, + 2882682840 + ], + "samples_ts": [ + 44.3562, + 44.3707, + 44.4031 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:43:20Z", + "avg_ns": 48872137761, + "stddev_ns": 61942135, + "avg_ts": 10.476328, + "stddev_ts": 0.013284, + "samples_ns": [ + 48886807727, + 48925427636, + 48804177921 + ], + "samples_ts": [ + 10.4732, + 10.4649, + 10.4909 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 549 + }, + { + "timestamp_utc": "2025-12-09T05:47:12.459269+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:45:48Z\",\n \"avg_ns\": 11766368687,\n \"stddev_ns\": 5252526,\n \"avg_ts\": 43.513856,\n \"stddev_ts\": 0.019427,\n \"samples_ns\": [ 11767378340, 11760684625, 11771043096 ],\n \"samples_ts\": [ 43.5101, 43.5349, 43.4966 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:46:35Z\",\n \"avg_ns\": 12174559769,\n \"stddev_ns\": 30444754,\n \"avg_ts\": 10.513771,\n \"stddev_ts\": 0.026295,\n \"samples_ns\": [ 12143634401, 12204499576, 12175545331 ],\n \"samples_ts\": [ 10.5405, 10.4879, 10.5129 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:45:48Z", + "avg_ns": 11766368687, + "stddev_ns": 5252526, + "avg_ts": 43.513856, + "stddev_ts": 0.019427, + "samples_ns": [ + 11767378340, + 11760684625, + 11771043096 + ], + "samples_ts": [ + 43.5101, + 43.5349, + 43.4966 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:46:35Z", + "avg_ns": 12174559769, + "stddev_ns": 30444754, + "avg_ts": 10.513771, + "stddev_ts": 0.026295, + "samples_ns": [ + 12143634401, + 12204499576, + 12175545331 + ], + "samples_ts": [ + 10.5405, + 10.4879, + 10.5129 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 550 + }, + { + "timestamp_utc": "2025-12-09T05:50:27.684057+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:47:13Z\",\n \"avg_ns\": 11765553733,\n \"stddev_ns\": 2619860,\n \"avg_ts\": 43.516865,\n \"stddev_ts\": 0.009691,\n \"samples_ns\": [ 11762557854, 11766688077, 11767415268 ],\n \"samples_ts\": [ 43.5279, 43.5127, 43.51 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:48:00Z\",\n \"avg_ns\": 49021606135,\n \"stddev_ns\": 13104402,\n \"avg_ts\": 10.444375,\n \"stddev_ts\": 0.002792,\n \"samples_ns\": [ 49006624660, 49027255155, 49030938590 ],\n \"samples_ts\": [ 10.4476, 10.4432, 10.4424 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:47:13Z", + "avg_ns": 11765553733, + "stddev_ns": 2619860, + "avg_ts": 43.516865, + "stddev_ts": 0.009691, + "samples_ns": [ + 11762557854, + 11766688077, + 11767415268 + ], + "samples_ts": [ + 43.5279, + 43.5127, + 43.51 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:48:00Z", + "avg_ns": 49021606135, + "stddev_ns": 13104402, + "avg_ts": 10.444375, + "stddev_ts": 0.002792, + "samples_ns": [ + 49006624660, + 49027255155, + 49030938590 + ], + "samples_ts": [ + 10.4476, + 10.4432, + 10.4424 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 551 + }, + { + "timestamp_utc": "2025-12-09T05:51:16.797024+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:50:28Z\",\n \"avg_ns\": 2882791083,\n \"stddev_ns\": 595875,\n \"avg_ts\": 44.401415,\n \"stddev_ts\": 0.009102,\n \"samples_ns\": [ 2882447794, 2883473528, 2882451929 ],\n \"samples_ts\": [ 44.4067, 44.3909, 44.4066 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:50:40Z\",\n \"avg_ns\": 12164412369,\n \"stddev_ns\": 30231942,\n \"avg_ts\": 10.522541,\n \"stddev_ts\": 0.026117,\n \"samples_ns\": [ 12142821149, 12151452248, 12198963710 ],\n \"samples_ts\": [ 10.5412, 10.5337, 10.4927 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:50:28Z", + "avg_ns": 2882791083, + "stddev_ns": 595875, + "avg_ts": 44.401415, + "stddev_ts": 0.009102, + "samples_ns": [ + 2882447794, + 2883473528, + 2882451929 + ], + "samples_ts": [ + 44.4067, + 44.3909, + 44.4066 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:50:40Z", + "avg_ns": 12164412369, + "stddev_ns": 30231942, + "avg_ts": 10.522541, + "stddev_ts": 0.026117, + "samples_ns": [ + 12142821149, + 12151452248, + 12198963710 + ], + "samples_ts": [ + 10.5412, + 10.5337, + 10.4927 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 552 + }, + { + "timestamp_utc": "2025-12-09T05:53:56.493167+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:51:17Z\",\n \"avg_ns\": 2882607152,\n \"stddev_ns\": 644139,\n \"avg_ts\": 44.404248,\n \"stddev_ts\": 0.009923,\n \"samples_ns\": [ 2883115062, 2881882627, 2882823767 ],\n \"samples_ts\": [ 44.3964, 44.4154, 44.4009 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:51:29Z\",\n \"avg_ns\": 49027581029,\n \"stddev_ns\": 34037255,\n \"avg_ts\": 10.443105,\n \"stddev_ts\": 0.007251,\n \"samples_ns\": [ 48990320946, 49035385116, 49057037027 ],\n \"samples_ts\": [ 10.451, 10.4414, 10.4368 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:51:17Z", + "avg_ns": 2882607152, + "stddev_ns": 644139, + "avg_ts": 44.404248, + "stddev_ts": 0.009923, + "samples_ns": [ + 2883115062, + 2881882627, + 2882823767 + ], + "samples_ts": [ + 44.3964, + 44.4154, + 44.4009 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:51:29Z", + "avg_ns": 49027581029, + "stddev_ns": 34037255, + "avg_ts": 10.443105, + "stddev_ts": 0.007251, + "samples_ns": [ + 48990320946, + 49035385116, + 49057037027 + ], + "samples_ts": [ + 10.451, + 10.4414, + 10.4368 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 553 + }, + { + "timestamp_utc": "2025-12-09T05:55:20.104304+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:53:57Z\",\n \"avg_ns\": 11499547062,\n \"stddev_ns\": 3666340,\n \"avg_ts\": 44.523496,\n \"stddev_ts\": 0.014185,\n \"samples_ns\": [ 11502508960, 11495450884, 11500681344 ],\n \"samples_ts\": [ 44.512, 44.5394, 44.5191 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:54:43Z\",\n \"avg_ns\": 12175101908,\n \"stddev_ns\": 17624388,\n \"avg_ts\": 10.513274,\n \"stddev_ts\": 0.015218,\n \"samples_ns\": [ 12157825908, 12174424655, 12193055161 ],\n \"samples_ts\": [ 10.5282, 10.5138, 10.4978 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:53:57Z", + "avg_ns": 11499547062, + "stddev_ns": 3666340, + "avg_ts": 44.523496, + "stddev_ts": 0.014185, + "samples_ns": [ + 11502508960, + 11495450884, + 11500681344 + ], + "samples_ts": [ + 44.512, + 44.5394, + 44.5191 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:54:43Z", + "avg_ns": 12175101908, + "stddev_ns": 17624388, + "avg_ts": 10.513274, + "stddev_ts": 0.015218, + "samples_ns": [ + 12157825908, + 12174424655, + 12193055161 + ], + "samples_ts": [ + 10.5282, + 10.5138, + 10.4978 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 554 + }, + { + "timestamp_utc": "2025-12-09T05:58:34.675421+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:55:21Z\",\n \"avg_ns\": 11563471050,\n \"stddev_ns\": 58330331,\n \"avg_ts\": 44.278111,\n \"stddev_ts\": 0.222706,\n \"samples_ns\": [ 11529807481, 11630824999, 11529780671 ],\n \"samples_ts\": [ 44.4066, 44.021, 44.4067 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:56:07Z\",\n \"avg_ns\": 49085587826,\n \"stddev_ns\": 44411759,\n \"avg_ts\": 10.430766,\n \"stddev_ts\": 0.009433,\n \"samples_ns\": [ 49051731624, 49135872688, 49069159167 ],\n \"samples_ts\": [ 10.438, 10.4201, 10.4343 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:55:21Z", + "avg_ns": 11563471050, + "stddev_ns": 58330331, + "avg_ts": 44.278111, + "stddev_ts": 0.222706, + "samples_ns": [ + 11529807481, + 11630824999, + 11529780671 + ], + "samples_ts": [ + 44.4066, + 44.021, + 44.4067 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:56:07Z", + "avg_ns": 49085587826, + "stddev_ns": 44411759, + "avg_ts": 10.430766, + "stddev_ts": 0.009433, + "samples_ns": [ + 49051731624, + 49135872688, + 49069159167 + ], + "samples_ts": [ + 10.438, + 10.4201, + 10.4343 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 555 + }, + { + "timestamp_utc": "2025-12-09T05:59:23.715356+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:58:35Z\",\n \"avg_ns\": 2919528161,\n \"stddev_ns\": 66433274,\n \"avg_ts\": 43.857644,\n \"stddev_ts\": 0.985037,\n \"samples_ns\": [ 2880537428, 2996235170, 2881811885 ],\n \"samples_ts\": [ 44.4362, 42.7203, 44.4165 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:58:47Z\",\n \"avg_ns\": 12098123472,\n \"stddev_ns\": 39198365,\n \"avg_ts\": 10.580227,\n \"stddev_ts\": 0.034344,\n \"samples_ns\": [ 12052898637, 12119143581, 12122328199 ],\n \"samples_ts\": [ 10.6199, 10.5618, 10.559 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:58:35Z", + "avg_ns": 2919528161, + "stddev_ns": 66433274, + "avg_ts": 43.857644, + "stddev_ts": 0.985037, + "samples_ns": [ + 2880537428, + 2996235170, + 2881811885 + ], + "samples_ts": [ + 44.4362, + 42.7203, + 44.4165 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T05:58:47Z", + "avg_ns": 12098123472, + "stddev_ns": 39198365, + "avg_ts": 10.580227, + "stddev_ts": 0.034344, + "samples_ns": [ + 12052898637, + 12119143581, + 12122328199 + ], + "samples_ts": [ + 10.6199, + 10.5618, + 10.559 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 556 + }, + { + "timestamp_utc": "2025-12-09T06:02:03.012464+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:59:24Z\",\n \"avg_ns\": 2873592881,\n \"stddev_ns\": 2133442,\n \"avg_ts\": 44.543557,\n \"stddev_ts\": 0.033057,\n \"samples_ns\": [ 2875800141, 2871543363, 2873435140 ],\n \"samples_ts\": [ 44.5094, 44.5753, 44.546 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T05:59:36Z\",\n \"avg_ns\": 48908151255,\n \"stddev_ns\": 16322905,\n \"avg_ts\": 10.468603,\n \"stddev_ts\": 0.003493,\n \"samples_ns\": [ 48924694725, 48907694548, 48892064494 ],\n \"samples_ts\": [ 10.4651, 10.4687, 10.472 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T05:59:24Z", + "avg_ns": 2873592881, + "stddev_ns": 2133442, + "avg_ts": 44.543557, + "stddev_ts": 0.033057, + "samples_ns": [ + 2875800141, + 2871543363, + 2873435140 + ], + "samples_ts": [ + 44.5094, + 44.5753, + 44.546 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T05:59:36Z", + "avg_ns": 48908151255, + "stddev_ns": 16322905, + "avg_ts": 10.468603, + "stddev_ts": 0.003493, + "samples_ns": [ + 48924694725, + 48907694548, + 48892064494 + ], + "samples_ts": [ + 10.4651, + 10.4687, + 10.472 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 557 + }, + { + "timestamp_utc": "2025-12-09T06:03:26.976855+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:02:03Z\",\n \"avg_ns\": 11579963761,\n \"stddev_ns\": 4575463,\n \"avg_ts\": 44.214305,\n \"stddev_ts\": 0.017468,\n \"samples_ns\": [ 11579326289, 11584824533, 11575740461 ],\n \"samples_ts\": [ 44.2167, 44.1957, 44.2304 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:02:50Z\",\n \"avg_ns\": 12177624731,\n \"stddev_ns\": 12720486,\n \"avg_ts\": 10.511089,\n \"stddev_ts\": 0.010985,\n \"samples_ns\": [ 12162963880, 12185717391, 12184192924 ],\n \"samples_ts\": [ 10.5238, 10.5041, 10.5054 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:02:03Z", + "avg_ns": 11579963761, + "stddev_ns": 4575463, + "avg_ts": 44.214305, + "stddev_ts": 0.017468, + "samples_ns": [ + 11579326289, + 11584824533, + 11575740461 + ], + "samples_ts": [ + 44.2167, + 44.1957, + 44.2304 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:02:50Z", + "avg_ns": 12177624731, + "stddev_ns": 12720486, + "avg_ts": 10.511089, + "stddev_ts": 0.010985, + "samples_ns": [ + 12162963880, + 12185717391, + 12184192924 + ], + "samples_ts": [ + 10.5238, + 10.5041, + 10.5054 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 558 + }, + { + "timestamp_utc": "2025-12-09T06:06:41.079231+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:03:27Z\",\n \"avg_ns\": 11603472653,\n \"stddev_ns\": 61354211,\n \"avg_ts\": 44.125542,\n \"stddev_ts\": 0.232607,\n \"samples_ns\": [ 11674316838, 11567658461, 11568442661 ],\n \"samples_ts\": [ 43.857, 44.2613, 44.2583 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:04:14Z\",\n \"avg_ns\": 48882203081,\n \"stddev_ns\": 68757218,\n \"avg_ts\": 10.474173,\n \"stddev_ts\": 0.014727,\n \"samples_ns\": [ 48869435153, 48820725118, 48956448973 ],\n \"samples_ts\": [ 10.4769, 10.4873, 10.4583 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:03:27Z", + "avg_ns": 11603472653, + "stddev_ns": 61354211, + "avg_ts": 44.125542, + "stddev_ts": 0.232607, + "samples_ns": [ + 11674316838, + 11567658461, + 11568442661 + ], + "samples_ts": [ + 43.857, + 44.2613, + 44.2583 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:04:14Z", + "avg_ns": 48882203081, + "stddev_ns": 68757218, + "avg_ts": 10.474173, + "stddev_ts": 0.014727, + "samples_ns": [ + 48869435153, + 48820725118, + 48956448973 + ], + "samples_ts": [ + 10.4769, + 10.4873, + 10.4583 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 559 + }, + { + "timestamp_utc": "2025-12-09T06:07:29.928381+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:06:41Z\",\n \"avg_ns\": 2878408762,\n \"stddev_ns\": 2143859,\n \"avg_ts\": 44.469031,\n \"stddev_ts\": 0.033090,\n \"samples_ns\": [ 2876691457, 2880809661, 2877725170 ],\n \"samples_ts\": [ 44.4956, 44.432, 44.4796 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:06:53Z\",\n \"avg_ns\": 12084120045,\n \"stddev_ns\": 66472282,\n \"avg_ts\": 10.592628,\n \"stddev_ts\": 0.058416,\n \"samples_ns\": [ 12009148432, 12135853912, 12107357791 ],\n \"samples_ts\": [ 10.6585, 10.5473, 10.5721 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:06:41Z", + "avg_ns": 2878408762, + "stddev_ns": 2143859, + "avg_ts": 44.469031, + "stddev_ts": 0.03309, + "samples_ns": [ + 2876691457, + 2880809661, + 2877725170 + ], + "samples_ts": [ + 44.4956, + 44.432, + 44.4796 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:06:53Z", + "avg_ns": 12084120045, + "stddev_ns": 66472282, + "avg_ts": 10.592628, + "stddev_ts": 0.058416, + "samples_ns": [ + 12009148432, + 12135853912, + 12107357791 + ], + "samples_ts": [ + 10.6585, + 10.5473, + 10.5721 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 560 + }, + { + "timestamp_utc": "2025-12-09T06:10:08.572095+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:07:30Z\",\n \"avg_ns\": 2880935926,\n \"stddev_ns\": 1802024,\n \"avg_ts\": 44.430017,\n \"stddev_ts\": 0.027757,\n \"samples_ns\": [ 2879758916, 2883008519, 2880040345 ],\n \"samples_ts\": [ 44.4482, 44.3981, 44.4438 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:07:42Z\",\n \"avg_ns\": 48662230236,\n \"stddev_ns\": 565559318,\n \"avg_ts\": 10.522461,\n \"stddev_ts\": 0.123114,\n \"samples_ns\": [ 49010171994, 48966861169, 48009657546 ],\n \"samples_ts\": [ 10.4468, 10.4561, 10.6645 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:07:30Z", + "avg_ns": 2880935926, + "stddev_ns": 1802024, + "avg_ts": 44.430017, + "stddev_ts": 0.027757, + "samples_ns": [ + 2879758916, + 2883008519, + 2880040345 + ], + "samples_ts": [ + 44.4482, + 44.3981, + 44.4438 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:07:42Z", + "avg_ns": 48662230236, + "stddev_ns": 565559318, + "avg_ts": 10.522461, + "stddev_ts": 0.123114, + "samples_ns": [ + 49010171994, + 48966861169, + 48009657546 + ], + "samples_ts": [ + 10.4468, + 10.4561, + 10.6645 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 561 + }, + { + "timestamp_utc": "2025-12-09T06:11:33.172649+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:10:09Z\",\n \"avg_ns\": 11738500157,\n \"stddev_ns\": 3027254,\n \"avg_ts\": 43.617159,\n \"stddev_ts\": 0.011249,\n \"samples_ns\": [ 11735270198, 11738957643, 11741272630 ],\n \"samples_ts\": [ 43.6292, 43.6155, 43.6069 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:10:56Z\",\n \"avg_ns\": 12175398358,\n \"stddev_ns\": 26968178,\n \"avg_ts\": 10.513037,\n \"stddev_ts\": 0.023298,\n \"samples_ns\": [ 12146569064, 12200006735, 12179619277 ],\n \"samples_ts\": [ 10.538, 10.4918, 10.5094 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:10:09Z", + "avg_ns": 11738500157, + "stddev_ns": 3027254, + "avg_ts": 43.617159, + "stddev_ts": 0.011249, + "samples_ns": [ + 11735270198, + 11738957643, + 11741272630 + ], + "samples_ts": [ + 43.6292, + 43.6155, + 43.6069 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:10:56Z", + "avg_ns": 12175398358, + "stddev_ns": 26968178, + "avg_ts": 10.513037, + "stddev_ts": 0.023298, + "samples_ns": [ + 12146569064, + 12200006735, + 12179619277 + ], + "samples_ts": [ + 10.538, + 10.4918, + 10.5094 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 562 + }, + { + "timestamp_utc": "2025-12-09T06:14:48.409003+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:11:34Z\",\n \"avg_ns\": 11746512377,\n \"stddev_ns\": 4840018,\n \"avg_ts\": 43.587411,\n \"stddev_ts\": 0.017959,\n \"samples_ns\": [ 11749974967, 11748578698, 11740983467 ],\n \"samples_ts\": [ 43.5746, 43.5797, 43.6079 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:12:21Z\",\n \"avg_ns\": 49054479235,\n \"stddev_ns\": 50063627,\n \"avg_ts\": 10.437382,\n \"stddev_ts\": 0.010658,\n \"samples_ns\": [ 48996684407, 49082310747, 49084442552 ],\n \"samples_ts\": [ 10.4497, 10.4315, 10.431 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:11:34Z", + "avg_ns": 11746512377, + "stddev_ns": 4840018, + "avg_ts": 43.587411, + "stddev_ts": 0.017959, + "samples_ns": [ + 11749974967, + 11748578698, + 11740983467 + ], + "samples_ts": [ + 43.5746, + 43.5797, + 43.6079 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:12:21Z", + "avg_ns": 49054479235, + "stddev_ns": 50063627, + "avg_ts": 10.437382, + "stddev_ts": 0.010658, + "samples_ns": [ + 48996684407, + 49082310747, + 49084442552 + ], + "samples_ts": [ + 10.4497, + 10.4315, + 10.431 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 563 + }, + { + "timestamp_utc": "2025-12-09T06:15:37.421177+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:14:49Z\",\n \"avg_ns\": 2875780043,\n \"stddev_ns\": 1150358,\n \"avg_ts\": 44.509668,\n \"stddev_ts\": 0.017781,\n \"samples_ns\": [ 2875023259, 2877102314, 2875214557 ],\n \"samples_ts\": [ 44.5214, 44.4892, 44.5184 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:15:00Z\",\n \"avg_ns\": 12127396473,\n \"stddev_ns\": 40612072,\n \"avg_ts\": 10.554694,\n \"stddev_ts\": 0.035378,\n \"samples_ns\": [ 12083485853, 12163606930, 12135096638 ],\n \"samples_ts\": [ 10.593, 10.5232, 10.5479 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:14:49Z", + "avg_ns": 2875780043, + "stddev_ns": 1150358, + "avg_ts": 44.509668, + "stddev_ts": 0.017781, + "samples_ns": [ + 2875023259, + 2877102314, + 2875214557 + ], + "samples_ts": [ + 44.5214, + 44.4892, + 44.5184 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:15:00Z", + "avg_ns": 12127396473, + "stddev_ns": 40612072, + "avg_ts": 10.554694, + "stddev_ts": 0.035378, + "samples_ns": [ + 12083485853, + 12163606930, + 12135096638 + ], + "samples_ts": [ + 10.593, + 10.5232, + 10.5479 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 564 + }, + { + "timestamp_utc": "2025-12-09T06:18:17.036303+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:15:38Z\",\n \"avg_ns\": 2885917384,\n \"stddev_ns\": 5242436,\n \"avg_ts\": 44.353412,\n \"stddev_ts\": 0.080637,\n \"samples_ns\": [ 2887718542, 2880011806, 2890021804 ],\n \"samples_ts\": [ 44.3256, 44.4443, 44.2903 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:15:49Z\",\n \"avg_ns\": 48994479363,\n \"stddev_ns\": 88374234,\n \"avg_ts\": 10.450180,\n \"stddev_ts\": 0.018868,\n \"samples_ns\": [ 48893149190, 49055587354, 49034701547 ],\n \"samples_ts\": [ 10.4718, 10.4371, 10.4416 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:15:38Z", + "avg_ns": 2885917384, + "stddev_ns": 5242436, + "avg_ts": 44.353412, + "stddev_ts": 0.080637, + "samples_ns": [ + 2887718542, + 2880011806, + 2890021804 + ], + "samples_ts": [ + 44.3256, + 44.4443, + 44.2903 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:15:49Z", + "avg_ns": 48994479363, + "stddev_ns": 88374234, + "avg_ts": 10.45018, + "stddev_ts": 0.018868, + "samples_ns": [ + 48893149190, + 49055587354, + 49034701547 + ], + "samples_ts": [ + 10.4718, + 10.4371, + 10.4416 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 565 + }, + { + "timestamp_utc": "2025-12-09T06:19:40.720273+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:18:17Z\",\n \"avg_ns\": 11514193148,\n \"stddev_ns\": 5817262,\n \"avg_ts\": 44.466866,\n \"stddev_ts\": 0.022472,\n \"samples_ns\": [ 11518474457, 11507569934, 11516535053 ],\n \"samples_ts\": [ 44.4503, 44.4925, 44.4578 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:19:04Z\",\n \"avg_ns\": 12179677624,\n \"stddev_ns\": 26053213,\n \"avg_ts\": 10.509341,\n \"stddev_ts\": 0.022471,\n \"samples_ns\": [ 12155401132, 12207202841, 12176428899 ],\n \"samples_ts\": [ 10.5303, 10.4856, 10.5121 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:18:17Z", + "avg_ns": 11514193148, + "stddev_ns": 5817262, + "avg_ts": 44.466866, + "stddev_ts": 0.022472, + "samples_ns": [ + 11518474457, + 11507569934, + 11516535053 + ], + "samples_ts": [ + 44.4503, + 44.4925, + 44.4578 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:19:04Z", + "avg_ns": 12179677624, + "stddev_ns": 26053213, + "avg_ts": 10.509341, + "stddev_ts": 0.022471, + "samples_ns": [ + 12155401132, + 12207202841, + 12176428899 + ], + "samples_ts": [ + 10.5303, + 10.4856, + 10.5121 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 566 + }, + { + "timestamp_utc": "2025-12-09T06:22:54.683687+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:19:41Z\",\n \"avg_ns\": 11518750574,\n \"stddev_ns\": 2457517,\n \"avg_ts\": 44.449267,\n \"stddev_ts\": 0.009465,\n \"samples_ns\": [ 11518543226, 11521300493, 11516408005 ],\n \"samples_ts\": [ 44.4501, 44.4394, 44.4583 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:20:27Z\",\n \"avg_ns\": 48913943995,\n \"stddev_ns\": 18014583,\n \"avg_ts\": 10.467364,\n \"stddev_ts\": 0.003854,\n \"samples_ns\": [ 48897860622, 48910563330, 48933408034 ],\n \"samples_ts\": [ 10.4708, 10.4681, 10.4632 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:19:41Z", + "avg_ns": 11518750574, + "stddev_ns": 2457517, + "avg_ts": 44.449267, + "stddev_ts": 0.009465, + "samples_ns": [ + 11518543226, + 11521300493, + 11516408005 + ], + "samples_ts": [ + 44.4501, + 44.4394, + 44.4583 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:20:27Z", + "avg_ns": 48913943995, + "stddev_ns": 18014583, + "avg_ts": 10.467364, + "stddev_ts": 0.003854, + "samples_ns": [ + 48897860622, + 48910563330, + 48933408034 + ], + "samples_ts": [ + 10.4708, + 10.4681, + 10.4632 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 567 + }, + { + "timestamp_utc": "2025-12-09T06:23:43.685742+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:22:55Z\",\n \"avg_ns\": 2877884219,\n \"stddev_ns\": 1972605,\n \"avg_ts\": 44.477133,\n \"stddev_ts\": 0.030474,\n \"samples_ns\": [ 2880160227, 2876823771, 2876668659 ],\n \"samples_ts\": [ 44.442, 44.4935, 44.4959 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:23:07Z\",\n \"avg_ns\": 12127546053,\n \"stddev_ns\": 39457355,\n \"avg_ts\": 10.554559,\n \"stddev_ts\": 0.034380,\n \"samples_ns\": [ 12083993807, 12137734590, 12160909763 ],\n \"samples_ts\": [ 10.5925, 10.5456, 10.5255 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:22:55Z", + "avg_ns": 2877884219, + "stddev_ns": 1972605, + "avg_ts": 44.477133, + "stddev_ts": 0.030474, + "samples_ns": [ + 2880160227, + 2876823771, + 2876668659 + ], + "samples_ts": [ + 44.442, + 44.4935, + 44.4959 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:23:07Z", + "avg_ns": 12127546053, + "stddev_ns": 39457355, + "avg_ts": 10.554559, + "stddev_ts": 0.03438, + "samples_ns": [ + 12083993807, + 12137734590, + 12160909763 + ], + "samples_ts": [ + 10.5925, + 10.5456, + 10.5255 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 568 + }, + { + "timestamp_utc": "2025-12-09T06:26:23.160413+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:23:44Z\",\n \"avg_ns\": 2877067095,\n \"stddev_ns\": 1865936,\n \"avg_ts\": 44.489764,\n \"stddev_ts\": 0.028839,\n \"samples_ns\": [ 2877696503, 2878535325, 2874969459 ],\n \"samples_ts\": [ 44.48, 44.4671, 44.5222 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:23:56Z\",\n \"avg_ns\": 48944431215,\n \"stddev_ns\": 73192271,\n \"avg_ts\": 10.460858,\n \"stddev_ts\": 0.015640,\n \"samples_ns\": [ 48937289195, 49020932017, 48875072435 ],\n \"samples_ts\": [ 10.4624, 10.4445, 10.4757 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:23:44Z", + "avg_ns": 2877067095, + "stddev_ns": 1865936, + "avg_ts": 44.489764, + "stddev_ts": 0.028839, + "samples_ns": [ + 2877696503, + 2878535325, + 2874969459 + ], + "samples_ts": [ + 44.48, + 44.4671, + 44.5222 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:23:56Z", + "avg_ns": 48944431215, + "stddev_ns": 73192271, + "avg_ts": 10.460858, + "stddev_ts": 0.01564, + "samples_ns": [ + 48937289195, + 49020932017, + 48875072435 + ], + "samples_ts": [ + 10.4624, + 10.4445, + 10.4757 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 569 + }, + { + "timestamp_utc": "2025-12-09T06:27:47.155605+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:26:24Z\",\n \"avg_ns\": 11585624040,\n \"stddev_ns\": 8059680,\n \"avg_ts\": 44.192714,\n \"stddev_ts\": 0.030733,\n \"samples_ns\": [ 11582512462, 11594775684, 11579583974 ],\n \"samples_ts\": [ 44.2046, 44.1578, 44.2158 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:27:10Z\",\n \"avg_ns\": 12188035953,\n \"stddev_ns\": 42954239,\n \"avg_ts\": 10.502189,\n \"stddev_ts\": 0.037087,\n \"samples_ns\": [ 12138525333, 12215356101, 12210226426 ],\n \"samples_ts\": [ 10.5449, 10.4786, 10.483 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:26:24Z", + "avg_ns": 11585624040, + "stddev_ns": 8059680, + "avg_ts": 44.192714, + "stddev_ts": 0.030733, + "samples_ns": [ + 11582512462, + 11594775684, + 11579583974 + ], + "samples_ts": [ + 44.2046, + 44.1578, + 44.2158 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:27:10Z", + "avg_ns": 12188035953, + "stddev_ns": 42954239, + "avg_ts": 10.502189, + "stddev_ts": 0.037087, + "samples_ns": [ + 12138525333, + 12215356101, + 12210226426 + ], + "samples_ts": [ + 10.5449, + 10.4786, + 10.483 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 570 + }, + { + "timestamp_utc": "2025-12-09T06:31:01.458980+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:27:48Z\",\n \"avg_ns\": 11581577316,\n \"stddev_ns\": 4383068,\n \"avg_ts\": 44.208145,\n \"stddev_ts\": 0.016732,\n \"samples_ns\": [ 11585464140, 11582441170, 11576826638 ],\n \"samples_ts\": [ 44.1933, 44.2048, 44.2263 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:28:34Z\",\n \"avg_ns\": 48958937627,\n \"stddev_ns\": 33614714,\n \"avg_ts\": 10.457747,\n \"stddev_ts\": 0.007181,\n \"samples_ns\": [ 48922202801, 48966453060, 48988157022 ],\n \"samples_ts\": [ 10.4656, 10.4561, 10.4515 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:27:48Z", + "avg_ns": 11581577316, + "stddev_ns": 4383068, + "avg_ts": 44.208145, + "stddev_ts": 0.016732, + "samples_ns": [ + 11585464140, + 11582441170, + 11576826638 + ], + "samples_ts": [ + 44.1933, + 44.2048, + 44.2263 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:28:34Z", + "avg_ns": 48958937627, + "stddev_ns": 33614714, + "avg_ts": 10.457747, + "stddev_ts": 0.007181, + "samples_ns": [ + 48922202801, + 48966453060, + 48988157022 + ], + "samples_ts": [ + 10.4656, + 10.4561, + 10.4515 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 571 + }, + { + "timestamp_utc": "2025-12-09T06:31:50.720109+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:31:02Z\",\n \"avg_ns\": 2877064526,\n \"stddev_ns\": 1018885,\n \"avg_ts\": 44.489795,\n \"stddev_ts\": 0.015732,\n \"samples_ns\": [ 2878174845, 2876176734, 2876842000 ],\n \"samples_ts\": [ 44.4726, 44.5035, 44.4932 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:31:13Z\",\n \"avg_ns\": 12193073767,\n \"stddev_ns\": 33126168,\n \"avg_ts\": 10.497815,\n \"stddev_ts\": 0.028565,\n \"samples_ns\": [ 12154828193, 12212744365, 12211648743 ],\n \"samples_ts\": [ 10.5308, 10.4809, 10.4818 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:31:02Z", + "avg_ns": 2877064526, + "stddev_ns": 1018885, + "avg_ts": 44.489795, + "stddev_ts": 0.015732, + "samples_ns": [ + 2878174845, + 2876176734, + 2876842000 + ], + "samples_ts": [ + 44.4726, + 44.5035, + 44.4932 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:31:13Z", + "avg_ns": 12193073767, + "stddev_ns": 33126168, + "avg_ts": 10.497815, + "stddev_ts": 0.028565, + "samples_ns": [ + 12154828193, + 12212744365, + 12211648743 + ], + "samples_ts": [ + 10.5308, + 10.4809, + 10.4818 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 572 + }, + { + "timestamp_utc": "2025-12-09T06:34:30.276082+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:31:51Z\",\n \"avg_ns\": 2882287369,\n \"stddev_ns\": 1007724,\n \"avg_ts\": 44.409177,\n \"stddev_ts\": 0.015483,\n \"samples_ns\": [ 2881233706, 2882393383, 2883235020 ],\n \"samples_ts\": [ 44.4254, 44.4075, 44.3946 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:32:03Z\",\n \"avg_ns\": 48967724138,\n \"stddev_ns\": 76337120,\n \"avg_ts\": 10.455884,\n \"stddev_ts\": 0.016303,\n \"samples_ns\": [ 48974661839, 49040354955, 48888155622 ],\n \"samples_ts\": [ 10.4544, 10.4404, 10.4729 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:31:51Z", + "avg_ns": 2882287369, + "stddev_ns": 1007724, + "avg_ts": 44.409177, + "stddev_ts": 0.015483, + "samples_ns": [ + 2881233706, + 2882393383, + 2883235020 + ], + "samples_ts": [ + 44.4254, + 44.4075, + 44.3946 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:32:03Z", + "avg_ns": 48967724138, + "stddev_ns": 76337120, + "avg_ts": 10.455884, + "stddev_ts": 0.016303, + "samples_ns": [ + 48974661839, + 49040354955, + 48888155622 + ], + "samples_ts": [ + 10.4544, + 10.4404, + 10.4729 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 573 + }, + { + "timestamp_utc": "2025-12-09T06:35:54.952307+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:34:31Z\",\n \"avg_ns\": 11792870999,\n \"stddev_ns\": 1904848,\n \"avg_ts\": 43.416061,\n \"stddev_ts\": 0.007001,\n \"samples_ns\": [ 11794870542, 11791085082, 11792657374 ],\n \"samples_ts\": [ 43.4087, 43.4226, 43.4168 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:35:18Z\",\n \"avg_ns\": 12125621009,\n \"stddev_ns\": 26930761,\n \"avg_ts\": 10.556195,\n \"stddev_ts\": 0.023464,\n \"samples_ns\": [ 12095884963, 12132609980, 12148368085 ],\n \"samples_ts\": [ 10.5821, 10.5501, 10.5364 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:34:31Z", + "avg_ns": 11792870999, + "stddev_ns": 1904848, + "avg_ts": 43.416061, + "stddev_ts": 0.007001, + "samples_ns": [ + 11794870542, + 11791085082, + 11792657374 + ], + "samples_ts": [ + 43.4087, + 43.4226, + 43.4168 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:35:18Z", + "avg_ns": 12125621009, + "stddev_ns": 26930761, + "avg_ts": 10.556195, + "stddev_ts": 0.023464, + "samples_ns": [ + 12095884963, + 12132609980, + 12148368085 + ], + "samples_ts": [ + 10.5821, + 10.5501, + 10.5364 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 574 + }, + { + "timestamp_utc": "2025-12-09T06:39:10.229957+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:35:55Z\",\n \"avg_ns\": 11768598565,\n \"stddev_ns\": 2800350,\n \"avg_ts\": 43.505607,\n \"stddev_ts\": 0.010336,\n \"samples_ns\": [ 11771513221, 11765938320, 11768344156 ],\n \"samples_ts\": [ 43.4948, 43.5154, 43.5065 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 1B Q4_K - Medium\",\n \"model_size\": 799525120,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:36:43Z\",\n \"avg_ns\": 49014013425,\n \"stddev_ns\": 96888258,\n \"avg_ts\": 10.446019,\n \"stddev_ts\": 0.020668,\n \"samples_ns\": [ 49088837984, 49048630652, 48904571640 ],\n \"samples_ts\": [ 10.4301, 10.4386, 10.4694 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:35:55Z", + "avg_ns": 11768598565, + "stddev_ns": 2800350, + "avg_ts": 43.505607, + "stddev_ts": 0.010336, + "samples_ns": [ + 11771513221, + 11765938320, + 11768344156 + ], + "samples_ts": [ + 43.4948, + 43.5154, + 43.5065 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_type": "gemma3 1B Q4_K - Medium", + "model_size": 799525120, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:36:43Z", + "avg_ns": 49014013425, + "stddev_ns": 96888258, + "avg_ts": 10.446019, + "stddev_ts": 0.020668, + "samples_ns": [ + 49088837984, + 49048630652, + 48904571640 + ], + "samples_ts": [ + 10.4301, + 10.4386, + 10.4694 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 575 + }, + { + "timestamp_utc": "2025-12-09T06:40:33.741705+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:39:29Z\",\n \"avg_ns\": 6394042724,\n \"stddev_ns\": 13442255,\n \"avg_ts\": 20.018693,\n \"stddev_ts\": 0.042035,\n \"samples_ns\": [ 6386108381, 6386456589, 6409563202 ],\n \"samples_ts\": [ 20.0435, 20.0424, 19.9702 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:39:55Z\",\n \"avg_ns\": 12799794660,\n \"stddev_ns\": 19627462,\n \"avg_ts\": 10.000176,\n \"stddev_ts\": 0.015321,\n \"samples_ns\": [ 12822410297, 12787207385, 12789766298 ],\n \"samples_ts\": [ 9.98252, 10.01, 10.008 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:39:29Z", + "avg_ns": 6394042724, + "stddev_ns": 13442255, + "avg_ts": 20.018693, + "stddev_ts": 0.042035, + "samples_ns": [ + 6386108381, + 6386456589, + 6409563202 + ], + "samples_ts": [ + 20.0435, + 20.0424, + 19.9702 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:39:55Z", + "avg_ns": 12799794660, + "stddev_ns": 19627462, + "avg_ts": 10.000176, + "stddev_ts": 0.015321, + "samples_ns": [ + 12822410297, + 12787207385, + 12789766298 + ], + "samples_ts": [ + 9.98252, + 10.01, + 10.008 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 576 + }, + { + "timestamp_utc": "2025-12-09T06:43:33.478989+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:40:34Z\",\n \"avg_ns\": 6373681013,\n \"stddev_ns\": 362605,\n \"avg_ts\": 20.082586,\n \"stddev_ts\": 0.001115,\n \"samples_ns\": [ 6373753151, 6373296798, 6373993091 ],\n \"samples_ts\": [ 20.0824, 20.0838, 20.0816 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:41:00Z\",\n \"avg_ns\": 51023041729,\n \"stddev_ns\": 2480918,\n \"avg_ts\": 10.034682,\n \"stddev_ts\": 0.000484,\n \"samples_ns\": [ 51025880408, 51021525758, 51021719023 ],\n \"samples_ts\": [ 10.0341, 10.035, 10.0349 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:40:34Z", + "avg_ns": 6373681013, + "stddev_ns": 362605, + "avg_ts": 20.082586, + "stddev_ts": 0.001115, + "samples_ns": [ + 6373753151, + 6373296798, + 6373993091 + ], + "samples_ts": [ + 20.0824, + 20.0838, + 20.0816 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:41:00Z", + "avg_ns": 51023041729, + "stddev_ns": 2480918, + "avg_ts": 10.034682, + "stddev_ts": 0.000484, + "samples_ns": [ + 51025880408, + 51021525758, + 51021719023 + ], + "samples_ts": [ + 10.0341, + 10.035, + 10.0349 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 577 + }, + { + "timestamp_utc": "2025-12-09T06:45:55.797879+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:43:34Z\",\n \"avg_ns\": 25721361217,\n \"stddev_ns\": 1163905,\n \"avg_ts\": 19.905634,\n \"stddev_ts\": 0.000901,\n \"samples_ns\": [ 25722575401, 25720255132, 25721253118 ],\n \"samples_ts\": [ 19.9047, 19.9065, 19.9057 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:45:17Z\",\n \"avg_ns\": 12767882918,\n \"stddev_ns\": 4411754,\n \"avg_ts\": 10.025155,\n \"stddev_ts\": 0.003461,\n \"samples_ns\": [ 12772647713, 12767053130, 12763947913 ],\n \"samples_ts\": [ 10.0214, 10.0258, 10.0282 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:43:34Z", + "avg_ns": 25721361217, + "stddev_ns": 1163905, + "avg_ts": 19.905634, + "stddev_ts": 0.000901, + "samples_ns": [ + 25722575401, + 25720255132, + 25721253118 + ], + "samples_ts": [ + 19.9047, + 19.9065, + 19.9057 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:45:17Z", + "avg_ns": 12767882918, + "stddev_ns": 4411754, + "avg_ts": 10.025155, + "stddev_ts": 0.003461, + "samples_ns": [ + 12772647713, + 12767053130, + 12763947913 + ], + "samples_ts": [ + 10.0214, + 10.0258, + 10.0282 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 578 + }, + { + "timestamp_utc": "2025-12-09T06:50:14.578802+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:45:56Z\",\n \"avg_ns\": 25719331392,\n \"stddev_ns\": 423839,\n \"avg_ts\": 19.907205,\n \"stddev_ts\": 0.000304,\n \"samples_ns\": [ 25719678247, 25719410332, 25718905598 ],\n \"samples_ts\": [ 19.9069, 19.9071, 19.9075 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:47:39Z\",\n \"avg_ns\": 51590153543,\n \"stddev_ns\": 3232020,\n \"avg_ts\": 9.924374,\n \"stddev_ts\": 0.000620,\n \"samples_ns\": [ 51587430122, 51589317164, 51593713344 ],\n \"samples_ts\": [ 9.9249, 9.92454, 9.92369 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:45:56Z", + "avg_ns": 25719331392, + "stddev_ns": 423839, + "avg_ts": 19.907205, + "stddev_ts": 0.000304, + "samples_ns": [ + 25719678247, + 25719410332, + 25718905598 + ], + "samples_ts": [ + 19.9069, + 19.9071, + 19.9075 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:47:39Z", + "avg_ns": 51590153543, + "stddev_ns": 3232020, + "avg_ts": 9.924374, + "stddev_ts": 0.00062, + "samples_ns": [ + 51587430122, + 51589317164, + 51593713344 + ], + "samples_ts": [ + 9.9249, + 9.92454, + 9.92369 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 579 + }, + { + "timestamp_utc": "2025-12-09T06:51:19.416465+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:50:15Z\",\n \"avg_ns\": 6377017544,\n \"stddev_ns\": 255656,\n \"avg_ts\": 20.072079,\n \"stddev_ts\": 0.000805,\n \"samples_ns\": [ 6377247439, 6377062976, 6376742217 ],\n \"samples_ts\": [ 20.0714, 20.0719, 20.0729 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:50:41Z\",\n \"avg_ns\": 12714177137,\n \"stddev_ns\": 912764,\n \"avg_ts\": 10.067502,\n \"stddev_ts\": 0.000723,\n \"samples_ns\": [ 12714040163, 12715150648, 12713340600 ],\n \"samples_ts\": [ 10.0676, 10.0667, 10.0682 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:50:15Z", + "avg_ns": 6377017544, + "stddev_ns": 255656, + "avg_ts": 20.072079, + "stddev_ts": 0.000805, + "samples_ns": [ + 6377247439, + 6377062976, + 6376742217 + ], + "samples_ts": [ + 20.0714, + 20.0719, + 20.0729 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:50:41Z", + "avg_ns": 12714177137, + "stddev_ns": 912764, + "avg_ts": 10.067502, + "stddev_ts": 0.000723, + "samples_ns": [ + 12714040163, + 12715150648, + 12713340600 + ], + "samples_ts": [ + 10.0676, + 10.0667, + 10.0682 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 580 + }, + { + "timestamp_utc": "2025-12-09T06:54:19.561226+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:51:20Z\",\n \"avg_ns\": 6377243380,\n \"stddev_ns\": 139480,\n \"avg_ts\": 20.071368,\n \"stddev_ts\": 0.000258,\n \"samples_ns\": [ 6377169980, 6377331652, 6377228510 ],\n \"samples_ts\": [ 20.0716, 20.0711, 20.0714 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:51:45Z\",\n \"avg_ns\": 51183579927,\n \"stddev_ns\": 847628,\n \"avg_ts\": 10.003208,\n \"stddev_ts\": 0.000153,\n \"samples_ns\": [ 51182760615, 51184325220, 51183653948 ],\n \"samples_ts\": [ 10.0034, 10.0031, 10.0032 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:51:20Z", + "avg_ns": 6377243380, + "stddev_ns": 139480, + "avg_ts": 20.071368, + "stddev_ts": 0.000258, + "samples_ns": [ + 6377169980, + 6377331652, + 6377228510 + ], + "samples_ts": [ + 20.0716, + 20.0711, + 20.0714 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:51:45Z", + "avg_ns": 51183579927, + "stddev_ns": 847628, + "avg_ts": 10.003208, + "stddev_ts": 0.000153, + "samples_ns": [ + 51182760615, + 51184325220, + 51183653948 + ], + "samples_ts": [ + 10.0034, + 10.0031, + 10.0032 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 581 + }, + { + "timestamp_utc": "2025-12-09T06:56:41.237532+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:54:20Z\",\n \"avg_ns\": 25654252160,\n \"stddev_ns\": 6960154,\n \"avg_ts\": 19.957706,\n \"stddev_ts\": 0.005414,\n \"samples_ns\": [ 25662222574, 25651160330, 25649373576 ],\n \"samples_ts\": [ 19.9515, 19.9601, 19.9615 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:56:03Z\",\n \"avg_ns\": 12659836215,\n \"stddev_ns\": 2875563,\n \"avg_ts\": 10.110716,\n \"stddev_ts\": 0.002293,\n \"samples_ns\": [ 12660772553, 12662122316, 12656613778 ],\n \"samples_ts\": [ 10.11, 10.1089, 10.1133 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:54:20Z", + "avg_ns": 25654252160, + "stddev_ns": 6960154, + "avg_ts": 19.957706, + "stddev_ts": 0.005414, + "samples_ns": [ + 25662222574, + 25651160330, + 25649373576 + ], + "samples_ts": [ + 19.9515, + 19.9601, + 19.9615 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T06:56:03Z", + "avg_ns": 12659836215, + "stddev_ns": 2875563, + "avg_ts": 10.110716, + "stddev_ts": 0.002293, + "samples_ns": [ + 12660772553, + 12662122316, + 12656613778 + ], + "samples_ts": [ + 10.11, + 10.1089, + 10.1133 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 582 + }, + { + "timestamp_utc": "2025-12-09T07:00:57.886307+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:56:42Z\",\n \"avg_ns\": 25634100719,\n \"stddev_ns\": 207425,\n \"avg_ts\": 19.973394,\n \"stddev_ts\": 0.000162,\n \"samples_ns\": [ 25634314987, 25634086280, 25633900890 ],\n \"samples_ts\": [ 19.9732, 19.9734, 19.9735 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T06:58:24Z\",\n \"avg_ns\": 51009786335,\n \"stddev_ns\": 377269,\n \"avg_ts\": 10.037290,\n \"stddev_ts\": 0.000059,\n \"samples_ns\": [ 51009978177, 51009437986, 51009942843 ],\n \"samples_ts\": [ 10.0373, 10.0374, 10.0373 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T06:56:42Z", + "avg_ns": 25634100719, + "stddev_ns": 207425, + "avg_ts": 19.973394, + "stddev_ts": 0.000162, + "samples_ns": [ + 25634314987, + 25634086280, + 25633900890 + ], + "samples_ts": [ + 19.9732, + 19.9734, + 19.9735 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T06:58:24Z", + "avg_ns": 51009786335, + "stddev_ns": 377269, + "avg_ts": 10.03729, + "stddev_ts": 5.9e-05, + "samples_ns": [ + 51009978177, + 51009437986, + 51009942843 + ], + "samples_ts": [ + 10.0373, + 10.0374, + 10.0373 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 583 + }, + { + "timestamp_utc": "2025-12-09T07:02:02.588205+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:00:58Z\",\n \"avg_ns\": 6376473432,\n \"stddev_ns\": 167375,\n \"avg_ts\": 20.073792,\n \"stddev_ts\": 0.000463,\n \"samples_ns\": [ 6376512977, 6376310603, 6376596717 ],\n \"samples_ts\": [ 20.0737, 20.0743, 20.0734 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:01:24Z\",\n \"avg_ns\": 12697303110,\n \"stddev_ns\": 905933,\n \"avg_ts\": 10.080881,\n \"stddev_ts\": 0.000708,\n \"samples_ns\": [ 12696972219, 12696624043, 12698313070 ],\n \"samples_ts\": [ 10.0811, 10.0814, 10.0801 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:00:58Z", + "avg_ns": 6376473432, + "stddev_ns": 167375, + "avg_ts": 20.073792, + "stddev_ts": 0.000463, + "samples_ns": [ + 6376512977, + 6376310603, + 6376596717 + ], + "samples_ts": [ + 20.0737, + 20.0743, + 20.0734 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T07:01:24Z", + "avg_ns": 12697303110, + "stddev_ns": 905933, + "avg_ts": 10.080881, + "stddev_ts": 0.000708, + "samples_ns": [ + 12696972219, + 12696624043, + 12698313070 + ], + "samples_ts": [ + 10.0811, + 10.0814, + 10.0801 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 584 + }, + { + "timestamp_utc": "2025-12-09T07:05:02.792579+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:02:03Z\",\n \"avg_ns\": 6378564733,\n \"stddev_ns\": 293449,\n \"avg_ts\": 20.067210,\n \"stddev_ts\": 0.000852,\n \"samples_ns\": [ 6378876882, 6378391978, 6378425341 ],\n \"samples_ts\": [ 20.0662, 20.0678, 20.0676 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:02:28Z\",\n \"avg_ns\": 51205073208,\n \"stddev_ns\": 1205304,\n \"avg_ts\": 9.999009,\n \"stddev_ts\": 0.000235,\n \"samples_ns\": [ 51204168413, 51204609762, 51206441449 ],\n \"samples_ts\": [ 9.99919, 9.9991, 9.99874 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:02:03Z", + "avg_ns": 6378564733, + "stddev_ns": 293449, + "avg_ts": 20.06721, + "stddev_ts": 0.000852, + "samples_ns": [ + 6378876882, + 6378391978, + 6378425341 + ], + "samples_ts": [ + 20.0662, + 20.0678, + 20.0676 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T07:02:28Z", + "avg_ns": 51205073208, + "stddev_ns": 1205304, + "avg_ts": 9.999009, + "stddev_ts": 0.000235, + "samples_ns": [ + 51204168413, + 51204609762, + 51206441449 + ], + "samples_ts": [ + 9.99919, + 9.9991, + 9.99874 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 585 + }, + { + "timestamp_utc": "2025-12-09T07:07:27.671172+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:05:03Z\",\n \"avg_ns\": 26446192587,\n \"stddev_ns\": 1438870,\n \"avg_ts\": 19.360065,\n \"stddev_ts\": 0.001040,\n \"samples_ns\": [ 26447024926, 26447000292, 26444552545 ],\n \"samples_ts\": [ 19.3595, 19.3595, 19.3613 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:06:49Z\",\n \"avg_ns\": 12663761026,\n \"stddev_ns\": 3786006,\n \"avg_ts\": 10.107582,\n \"stddev_ts\": 0.003020,\n \"samples_ns\": [ 12660041857, 12667607320, 12663633902 ],\n \"samples_ts\": [ 10.1106, 10.1045, 10.1077 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:05:03Z", + "avg_ns": 26446192587, + "stddev_ns": 1438870, + "avg_ts": 19.360065, + "stddev_ts": 0.00104, + "samples_ns": [ + 26447024926, + 26447000292, + 26444552545 + ], + "samples_ts": [ + 19.3595, + 19.3595, + 19.3613 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T07:06:49Z", + "avg_ns": 12663761026, + "stddev_ns": 3786006, + "avg_ts": 10.107582, + "stddev_ts": 0.00302, + "samples_ns": [ + 12660041857, + 12667607320, + 12663633902 + ], + "samples_ts": [ + 10.1106, + 10.1045, + 10.1077 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 586 + }, + { + "timestamp_utc": "2025-12-09T07:11:47.684819+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:07:28Z\",\n \"avg_ns\": 26456345675,\n \"stddev_ns\": 2502525,\n \"avg_ts\": 19.352635,\n \"stddev_ts\": 0.001831,\n \"samples_ns\": [ 26453612033, 26458523691, 26456901301 ],\n \"samples_ts\": [ 19.3546, 19.351, 19.3522 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:09:14Z\",\n \"avg_ns\": 51033249113,\n \"stddev_ns\": 600559,\n \"avg_ts\": 10.032675,\n \"stddev_ts\": 0.000118,\n \"samples_ns\": [ 51033694309, 51032566056, 51033486974 ],\n \"samples_ts\": [ 10.0326, 10.0328, 10.0326 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:07:28Z", + "avg_ns": 26456345675, + "stddev_ns": 2502525, + "avg_ts": 19.352635, + "stddev_ts": 0.001831, + "samples_ns": [ + 26453612033, + 26458523691, + 26456901301 + ], + "samples_ts": [ + 19.3546, + 19.351, + 19.3522 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T07:09:14Z", + "avg_ns": 51033249113, + "stddev_ns": 600559, + "avg_ts": 10.032675, + "stddev_ts": 0.000118, + "samples_ns": [ + 51033694309, + 51032566056, + 51033486974 + ], + "samples_ts": [ + 10.0326, + 10.0328, + 10.0326 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 587 + }, + { + "timestamp_utc": "2025-12-09T07:12:52.196105+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:11:48Z\",\n \"avg_ns\": 6376922315,\n \"stddev_ns\": 367019,\n \"avg_ts\": 20.072379,\n \"stddev_ts\": 0.001155,\n \"samples_ns\": [ 6376675375, 6377344061, 6376747509 ],\n \"samples_ts\": [ 20.0732, 20.0711, 20.0729 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:12:14Z\",\n \"avg_ns\": 12632202516,\n \"stddev_ns\": 1577512,\n \"avg_ts\": 10.132833,\n \"stddev_ts\": 0.001265,\n \"samples_ns\": [ 12633982494, 12631647644, 12630977410 ],\n \"samples_ts\": [ 10.1314, 10.1333, 10.1338 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:11:48Z", + "avg_ns": 6376922315, + "stddev_ns": 367019, + "avg_ts": 20.072379, + "stddev_ts": 0.001155, + "samples_ns": [ + 6376675375, + 6377344061, + 6376747509 + ], + "samples_ts": [ + 20.0732, + 20.0711, + 20.0729 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T07:12:14Z", + "avg_ns": 12632202516, + "stddev_ns": 1577512, + "avg_ts": 10.132833, + "stddev_ts": 0.001265, + "samples_ns": [ + 12633982494, + 12631647644, + 12630977410 + ], + "samples_ts": [ + 10.1314, + 10.1333, + 10.1338 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 588 + }, + { + "timestamp_utc": "2025-12-09T07:15:52.041789+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:12:53Z\",\n \"avg_ns\": 6376231026,\n \"stddev_ns\": 301166,\n \"avg_ts\": 20.074555,\n \"stddev_ts\": 0.000914,\n \"samples_ns\": [ 6375897762, 6376429689, 6376365628 ],\n \"samples_ts\": [ 20.0756, 20.0739, 20.0741 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:13:18Z\",\n \"avg_ns\": 51081109098,\n \"stddev_ns\": 6497895,\n \"avg_ts\": 10.023275,\n \"stddev_ts\": 0.001274,\n \"samples_ns\": [ 51074385242, 51081605229, 51087336825 ],\n \"samples_ts\": [ 10.0246, 10.0232, 10.0221 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:12:53Z", + "avg_ns": 6376231026, + "stddev_ns": 301166, + "avg_ts": 20.074555, + "stddev_ts": 0.000914, + "samples_ns": [ + 6375897762, + 6376429689, + 6376365628 + ], + "samples_ts": [ + 20.0756, + 20.0739, + 20.0741 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T07:13:18Z", + "avg_ns": 51081109098, + "stddev_ns": 6497895, + "avg_ts": 10.023275, + "stddev_ts": 0.001274, + "samples_ns": [ + 51074385242, + 51081605229, + 51087336825 + ], + "samples_ts": [ + 10.0246, + 10.0232, + 10.0221 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 589 + }, + { + "timestamp_utc": "2025-12-09T07:18:13.813561+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:15:52Z\",\n \"avg_ns\": 25725391649,\n \"stddev_ns\": 695710,\n \"avg_ts\": 19.902515,\n \"stddev_ts\": 0.000538,\n \"samples_ns\": [ 25724627982, 25725557561, 25725989404 ],\n \"samples_ts\": [ 19.9031, 19.9024, 19.9021 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:17:35Z\",\n \"avg_ns\": 12591974344,\n \"stddev_ns\": 964244,\n \"avg_ts\": 10.165205,\n \"stddev_ts\": 0.000778,\n \"samples_ns\": [ 12593075403, 12591280570, 12591567059 ],\n \"samples_ts\": [ 10.1643, 10.1658, 10.1655 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:15:52Z", + "avg_ns": 25725391649, + "stddev_ns": 695710, + "avg_ts": 19.902515, + "stddev_ts": 0.000538, + "samples_ns": [ + 25724627982, + 25725557561, + 25725989404 + ], + "samples_ts": [ + 19.9031, + 19.9024, + 19.9021 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T07:17:35Z", + "avg_ns": 12591974344, + "stddev_ns": 964244, + "avg_ts": 10.165205, + "stddev_ts": 0.000778, + "samples_ns": [ + 12593075403, + 12591280570, + 12591567059 + ], + "samples_ts": [ + 10.1643, + 10.1658, + 10.1655 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 590 + }, + { + "timestamp_utc": "2025-12-09T07:22:30.938240+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:18:14Z\",\n \"avg_ns\": 25721036284,\n \"stddev_ns\": 874838,\n \"avg_ts\": 19.905885,\n \"stddev_ts\": 0.000677,\n \"samples_ns\": [ 25721378442, 25721688332, 25720042078 ],\n \"samples_ts\": [ 19.9056, 19.9054, 19.9067 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:19:57Z\",\n \"avg_ns\": 51047023413,\n \"stddev_ns\": 3297800,\n \"avg_ts\": 10.029968,\n \"stddev_ts\": 0.000646,\n \"samples_ns\": [ 51043472537, 51047629278, 51049968425 ],\n \"samples_ts\": [ 10.0307, 10.0298, 10.0294 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:18:14Z", + "avg_ns": 25721036284, + "stddev_ns": 874838, + "avg_ts": 19.905885, + "stddev_ts": 0.000677, + "samples_ns": [ + 25721378442, + 25721688332, + 25720042078 + ], + "samples_ts": [ + 19.9056, + 19.9054, + 19.9067 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T07:19:57Z", + "avg_ns": 51047023413, + "stddev_ns": 3297800, + "avg_ts": 10.029968, + "stddev_ts": 0.000646, + "samples_ns": [ + 51043472537, + 51047629278, + 51049968425 + ], + "samples_ts": [ + 10.0307, + 10.0298, + 10.0294 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 591 + }, + { + "timestamp_utc": "2025-12-09T07:23:35.182732+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:22:31Z\",\n \"avg_ns\": 6377330322,\n \"stddev_ns\": 180851,\n \"avg_ts\": 20.071095,\n \"stddev_ts\": 0.000445,\n \"samples_ns\": [ 6377484950, 6377297958, 6377208060 ],\n \"samples_ts\": [ 20.0706, 20.0712, 20.0715 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:22:57Z\",\n \"avg_ns\": 12552446511,\n \"stddev_ns\": 237777,\n \"avg_ts\": 10.197215,\n \"stddev_ts\": 0.000170,\n \"samples_ns\": [ 12552404113, 12552674199, 12552261222 ],\n \"samples_ts\": [ 10.1972, 10.197, 10.1974 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:22:31Z", + "avg_ns": 6377330322, + "stddev_ns": 180851, + "avg_ts": 20.071095, + "stddev_ts": 0.000445, + "samples_ns": [ + 6377484950, + 6377297958, + 6377208060 + ], + "samples_ts": [ + 20.0706, + 20.0712, + 20.0715 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T07:22:57Z", + "avg_ns": 12552446511, + "stddev_ns": 237777, + "avg_ts": 10.197215, + "stddev_ts": 0.00017, + "samples_ns": [ + 12552404113, + 12552674199, + 12552261222 + ], + "samples_ts": [ + 10.1972, + 10.197, + 10.1974 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 592 + }, + { + "timestamp_utc": "2025-12-09T07:26:35.424994+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:23:36Z\",\n \"avg_ns\": 6374201843,\n \"stddev_ns\": 140706,\n \"avg_ts\": 20.080946,\n \"stddev_ts\": 0.000265,\n \"samples_ns\": [ 6374159633, 6374298537, 6374147361 ],\n \"samples_ts\": [ 20.0811, 20.0806, 20.0811 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:24:01Z\",\n \"avg_ns\": 51223523472,\n \"stddev_ns\": 2299936,\n \"avg_ts\": 9.995408,\n \"stddev_ts\": 0.000447,\n \"samples_ns\": [ 51223335189, 51221334656, 51225900572 ],\n \"samples_ts\": [ 9.99544, 9.99583, 9.99494 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:23:36Z", + "avg_ns": 6374201843, + "stddev_ns": 140706, + "avg_ts": 20.080946, + "stddev_ts": 0.000265, + "samples_ns": [ + 6374159633, + 6374298537, + 6374147361 + ], + "samples_ts": [ + 20.0811, + 20.0806, + 20.0811 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T07:24:01Z", + "avg_ns": 51223523472, + "stddev_ns": 2299936, + "avg_ts": 9.995408, + "stddev_ts": 0.000447, + "samples_ns": [ + 51223335189, + 51221334656, + 51225900572 + ], + "samples_ts": [ + 9.99544, + 9.99583, + 9.99494 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 593 + }, + { + "timestamp_utc": "2025-12-09T07:28:57.290958+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:26:36Z\",\n \"avg_ns\": 25656099006,\n \"stddev_ns\": 701674,\n \"avg_ts\": 19.956268,\n \"stddev_ts\": 0.000517,\n \"samples_ns\": [ 25656380548, 25656575962, 25655340510 ],\n \"samples_ts\": [ 19.956, 19.9559, 19.9569 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:28:18Z\",\n \"avg_ns\": 12711406389,\n \"stddev_ns\": 6201045,\n \"avg_ts\": 10.069698,\n \"stddev_ts\": 0.004910,\n \"samples_ns\": [ 12705722454, 12710480440, 12718016275 ],\n \"samples_ts\": [ 10.0742, 10.0704, 10.0645 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:26:36Z", + "avg_ns": 25656099006, + "stddev_ns": 701674, + "avg_ts": 19.956268, + "stddev_ts": 0.000517, + "samples_ns": [ + 25656380548, + 25656575962, + 25655340510 + ], + "samples_ts": [ + 19.956, + 19.9559, + 19.9569 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T07:28:18Z", + "avg_ns": 12711406389, + "stddev_ns": 6201045, + "avg_ts": 10.069698, + "stddev_ts": 0.00491, + "samples_ns": [ + 12705722454, + 12710480440, + 12718016275 + ], + "samples_ts": [ + 10.0742, + 10.0704, + 10.0645 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 594 + }, + { + "timestamp_utc": "2025-12-09T07:33:14.942489+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:28:58Z\",\n \"avg_ns\": 25647430548,\n \"stddev_ns\": 412114,\n \"avg_ts\": 19.963013,\n \"stddev_ts\": 0.000296,\n \"samples_ns\": [ 25647826618, 25647069611, 25647395416 ],\n \"samples_ts\": [ 19.9627, 19.9633, 19.963 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:30:40Z\",\n \"avg_ns\": 51323507704,\n \"stddev_ns\": 1056116,\n \"avg_ts\": 9.975935,\n \"stddev_ts\": 0.000196,\n \"samples_ns\": [ 51322348631, 51324158978, 51324015505 ],\n \"samples_ts\": [ 9.97616, 9.97581, 9.97584 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:28:58Z", + "avg_ns": 25647430548, + "stddev_ns": 412114, + "avg_ts": 19.963013, + "stddev_ts": 0.000296, + "samples_ns": [ + 25647826618, + 25647069611, + 25647395416 + ], + "samples_ts": [ + 19.9627, + 19.9633, + 19.963 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T07:30:40Z", + "avg_ns": 51323507704, + "stddev_ns": 1056116, + "avg_ts": 9.975935, + "stddev_ts": 0.000196, + "samples_ns": [ + 51322348631, + 51324158978, + 51324015505 + ], + "samples_ts": [ + 9.97616, + 9.97581, + 9.97584 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 595 + }, + { + "timestamp_utc": "2025-12-09T07:34:19.400000+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:33:15Z\",\n \"avg_ns\": 6375097310,\n \"stddev_ns\": 152505,\n \"avg_ts\": 20.078125,\n \"stddev_ts\": 0.000323,\n \"samples_ns\": [ 6375162653, 6374979168, 6375150111 ],\n \"samples_ts\": [ 20.0779, 20.0785, 20.078 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:33:41Z\",\n \"avg_ns\": 12624602148,\n \"stddev_ns\": 821341,\n \"avg_ts\": 10.138933,\n \"stddev_ts\": 0.000647,\n \"samples_ns\": [ 12623678035, 12624970060, 12625158351 ],\n \"samples_ts\": [ 10.1397, 10.1386, 10.1385 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:33:15Z", + "avg_ns": 6375097310, + "stddev_ns": 152505, + "avg_ts": 20.078125, + "stddev_ts": 0.000323, + "samples_ns": [ + 6375162653, + 6374979168, + 6375150111 + ], + "samples_ts": [ + 20.0779, + 20.0785, + 20.078 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T07:33:41Z", + "avg_ns": 12624602148, + "stddev_ns": 821341, + "avg_ts": 10.138933, + "stddev_ts": 0.000647, + "samples_ns": [ + 12623678035, + 12624970060, + 12625158351 + ], + "samples_ts": [ + 10.1397, + 10.1386, + 10.1385 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 596 + }, + { + "timestamp_utc": "2025-12-09T07:37:20.501128+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:34:20Z\",\n \"avg_ns\": 6376078481,\n \"stddev_ns\": 116081,\n \"avg_ts\": 20.075035,\n \"stddev_ts\": 0.000265,\n \"samples_ns\": [ 6376096552, 6376152235, 6375986657 ],\n \"samples_ts\": [ 20.075, 20.0748, 20.0753 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:34:45Z\",\n \"avg_ns\": 51502305232,\n \"stddev_ns\": 1790451,\n \"avg_ts\": 9.941303,\n \"stddev_ts\": 0.000340,\n \"samples_ns\": [ 51502901469, 51503691184, 51500323045 ],\n \"samples_ts\": [ 9.94119, 9.94104, 9.94169 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:34:20Z", + "avg_ns": 6376078481, + "stddev_ns": 116081, + "avg_ts": 20.075035, + "stddev_ts": 0.000265, + "samples_ns": [ + 6376096552, + 6376152235, + 6375986657 + ], + "samples_ts": [ + 20.075, + 20.0748, + 20.0753 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T07:34:45Z", + "avg_ns": 51502305232, + "stddev_ns": 1790451, + "avg_ts": 9.941303, + "stddev_ts": 0.00034, + "samples_ns": [ + 51502901469, + 51503691184, + 51500323045 + ], + "samples_ts": [ + 9.94119, + 9.94104, + 9.94169 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 597 + }, + { + "timestamp_utc": "2025-12-09T07:39:45.346649+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:37:21Z\",\n \"avg_ns\": 26452702165,\n \"stddev_ns\": 274400,\n \"avg_ts\": 19.355301,\n \"stddev_ts\": 0.000109,\n \"samples_ns\": [ 26452742934, 26452827191, 26452536372 ],\n \"samples_ts\": [ 19.3553, 19.3552, 19.3554 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:39:07Z\",\n \"avg_ns\": 12627969767,\n \"stddev_ns\": 577223,\n \"avg_ts\": 10.136230,\n \"stddev_ts\": 0.000454,\n \"samples_ns\": [ 12627354777, 12628085170, 12628469355 ],\n \"samples_ts\": [ 10.1367, 10.1361, 10.1358 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:37:21Z", + "avg_ns": 26452702165, + "stddev_ns": 274400, + "avg_ts": 19.355301, + "stddev_ts": 0.000109, + "samples_ns": [ + 26452742934, + 26452827191, + 26452536372 + ], + "samples_ts": [ + 19.3553, + 19.3552, + 19.3554 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T07:39:07Z", + "avg_ns": 12627969767, + "stddev_ns": 577223, + "avg_ts": 10.13623, + "stddev_ts": 0.000454, + "samples_ns": [ + 12627354777, + 12628085170, + 12628469355 + ], + "samples_ts": [ + 10.1367, + 10.1361, + 10.1358 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 598 + }, + { + "timestamp_utc": "2025-12-09T07:44:05.327384+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:39:46Z\",\n \"avg_ns\": 26466083870,\n \"stddev_ns\": 1943770,\n \"avg_ts\": 19.345514,\n \"stddev_ts\": 0.001421,\n \"samples_ns\": [ 26467606212, 26466751020, 26463894378 ],\n \"samples_ts\": [ 19.3444, 19.345, 19.3471 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:41:32Z\",\n \"avg_ns\": 51010426568,\n \"stddev_ns\": 1063059,\n \"avg_ts\": 10.037164,\n \"stddev_ts\": 0.000209,\n \"samples_ns\": [ 51011653864, 51009833016, 51009792824 ],\n \"samples_ts\": [ 10.0369, 10.0373, 10.0373 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:39:46Z", + "avg_ns": 26466083870, + "stddev_ns": 1943770, + "avg_ts": 19.345514, + "stddev_ts": 0.001421, + "samples_ns": [ + 26467606212, + 26466751020, + 26463894378 + ], + "samples_ts": [ + 19.3444, + 19.345, + 19.3471 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T07:41:32Z", + "avg_ns": 51010426568, + "stddev_ns": 1063059, + "avg_ts": 10.037164, + "stddev_ts": 0.000209, + "samples_ns": [ + 51011653864, + 51009833016, + 51009792824 + ], + "samples_ts": [ + 10.0369, + 10.0373, + 10.0373 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 599 + }, + { + "timestamp_utc": "2025-12-09T07:45:10.071378+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:44:06Z\",\n \"avg_ns\": 6377135138,\n \"stddev_ns\": 136923,\n \"avg_ts\": 20.071709,\n \"stddev_ts\": 0.000244,\n \"samples_ns\": [ 6377186461, 6377046088, 6377172867 ],\n \"samples_ts\": [ 20.0715, 20.072, 20.0716 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:44:31Z\",\n \"avg_ns\": 12719567122,\n \"stddev_ns\": 1636559,\n \"avg_ts\": 10.063236,\n \"stddev_ts\": 0.001292,\n \"samples_ns\": [ 12717766368, 12719984208, 12720950791 ],\n \"samples_ts\": [ 10.0647, 10.0629, 10.0621 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:44:06Z", + "avg_ns": 6377135138, + "stddev_ns": 136923, + "avg_ts": 20.071709, + "stddev_ts": 0.000244, + "samples_ns": [ + 6377186461, + 6377046088, + 6377172867 + ], + "samples_ts": [ + 20.0715, + 20.072, + 20.0716 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T07:44:31Z", + "avg_ns": 12719567122, + "stddev_ns": 1636559, + "avg_ts": 10.063236, + "stddev_ts": 0.001292, + "samples_ns": [ + 12717766368, + 12719984208, + 12720950791 + ], + "samples_ts": [ + 10.0647, + 10.0629, + 10.0621 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 600 + }, + { + "timestamp_utc": "2025-12-09T07:48:10.239191+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:45:10Z\",\n \"avg_ns\": 6376225585,\n \"stddev_ns\": 153440,\n \"avg_ts\": 20.074572,\n \"stddev_ts\": 0.000483,\n \"samples_ns\": [ 6376303130, 6376324777, 6376048848 ],\n \"samples_ts\": [ 20.0743, 20.0743, 20.0751 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:45:36Z\",\n \"avg_ns\": 51194900248,\n \"stddev_ns\": 787543,\n \"avg_ts\": 10.000996,\n \"stddev_ts\": 0.000147,\n \"samples_ns\": [ 51195750952, 51194312856, 51194636937 ],\n \"samples_ts\": [ 10.0008, 10.0011, 10.001 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:45:10Z", + "avg_ns": 6376225585, + "stddev_ns": 153440, + "avg_ts": 20.074572, + "stddev_ts": 0.000483, + "samples_ns": [ + 6376303130, + 6376324777, + 6376048848 + ], + "samples_ts": [ + 20.0743, + 20.0743, + 20.0751 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T07:45:36Z", + "avg_ns": 51194900248, + "stddev_ns": 787543, + "avg_ts": 10.000996, + "stddev_ts": 0.000147, + "samples_ns": [ + 51195750952, + 51194312856, + 51194636937 + ], + "samples_ts": [ + 10.0008, + 10.0011, + 10.001 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 601 + }, + { + "timestamp_utc": "2025-12-09T07:50:32.143887+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:48:11Z\",\n \"avg_ns\": 25731523250,\n \"stddev_ns\": 1147031,\n \"avg_ts\": 19.897773,\n \"stddev_ts\": 0.000869,\n \"samples_ns\": [ 25732603281, 25731607230, 25730359241 ],\n \"samples_ts\": [ 19.8969, 19.8977, 19.8987 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:49:54Z\",\n \"avg_ns\": 12615284339,\n \"stddev_ns\": 302938,\n \"avg_ts\": 10.146422,\n \"stddev_ts\": 0.000226,\n \"samples_ns\": [ 12615486580, 12615403402, 12614963036 ],\n \"samples_ts\": [ 10.1463, 10.1463, 10.1467 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:48:11Z", + "avg_ns": 25731523250, + "stddev_ns": 1147031, + "avg_ts": 19.897773, + "stddev_ts": 0.000869, + "samples_ns": [ + 25732603281, + 25731607230, + 25730359241 + ], + "samples_ts": [ + 19.8969, + 19.8977, + 19.8987 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T07:49:54Z", + "avg_ns": 12615284339, + "stddev_ns": 302938, + "avg_ts": 10.146422, + "stddev_ts": 0.000226, + "samples_ns": [ + 12615486580, + 12615403402, + 12614963036 + ], + "samples_ts": [ + 10.1463, + 10.1463, + 10.1467 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 602 + }, + { + "timestamp_utc": "2025-12-09T07:54:50.196105+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:50:33Z\",\n \"avg_ns\": 25704739624,\n \"stddev_ns\": 341935,\n \"avg_ts\": 19.918506,\n \"stddev_ts\": 0.000198,\n \"samples_ns\": [ 25705035168, 25704590907, 25704592799 ],\n \"samples_ts\": [ 19.9183, 19.9186, 19.9186 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:52:15Z\",\n \"avg_ns\": 51386428871,\n \"stddev_ns\": 4107642,\n \"avg_ts\": 9.963720,\n \"stddev_ts\": 0.000795,\n \"samples_ns\": [ 51383000069, 51385314169, 51390972376 ],\n \"samples_ts\": [ 9.96439, 9.96394, 9.96284 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:50:33Z", + "avg_ns": 25704739624, + "stddev_ns": 341935, + "avg_ts": 19.918506, + "stddev_ts": 0.000198, + "samples_ns": [ + 25705035168, + 25704590907, + 25704592799 + ], + "samples_ts": [ + 19.9183, + 19.9186, + 19.9186 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T07:52:15Z", + "avg_ns": 51386428871, + "stddev_ns": 4107642, + "avg_ts": 9.96372, + "stddev_ts": 0.000795, + "samples_ns": [ + 51383000069, + 51385314169, + 51390972376 + ], + "samples_ts": [ + 9.96439, + 9.96394, + 9.96284 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 603 + }, + { + "timestamp_utc": "2025-12-09T07:55:54.883650+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:54:51Z\",\n \"avg_ns\": 6376594556,\n \"stddev_ns\": 485595,\n \"avg_ts\": 20.073411,\n \"stddev_ts\": 0.001529,\n \"samples_ns\": [ 6376975490, 6376760417, 6376047761 ],\n \"samples_ts\": [ 20.0722, 20.0729, 20.0751 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:55:16Z\",\n \"avg_ns\": 12701645938,\n \"stddev_ns\": 3676712,\n \"avg_ts\": 10.077435,\n \"stddev_ts\": 0.002915,\n \"samples_ns\": [ 12705857488, 12699990329, 12699089998 ],\n \"samples_ts\": [ 10.0741, 10.0787, 10.0795 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:54:51Z", + "avg_ns": 6376594556, + "stddev_ns": 485595, + "avg_ts": 20.073411, + "stddev_ts": 0.001529, + "samples_ns": [ + 6376975490, + 6376760417, + 6376047761 + ], + "samples_ts": [ + 20.0722, + 20.0729, + 20.0751 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T07:55:16Z", + "avg_ns": 12701645938, + "stddev_ns": 3676712, + "avg_ts": 10.077435, + "stddev_ts": 0.002915, + "samples_ns": [ + 12705857488, + 12699990329, + 12699089998 + ], + "samples_ts": [ + 10.0741, + 10.0787, + 10.0795 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 604 + }, + { + "timestamp_utc": "2025-12-09T07:58:55.292634+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:55:55Z\",\n \"avg_ns\": 6376616629,\n \"stddev_ns\": 100093,\n \"avg_ts\": 20.073341,\n \"stddev_ts\": 0.000315,\n \"samples_ns\": [ 6376511246, 6376628217, 6376710424 ],\n \"samples_ts\": [ 20.0737, 20.0733, 20.073 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:56:21Z\",\n \"avg_ns\": 51270597229,\n \"stddev_ns\": 3137723,\n \"avg_ts\": 9.986230,\n \"stddev_ts\": 0.000611,\n \"samples_ns\": [ 51267903943, 51274042673, 51269845071 ],\n \"samples_ts\": [ 9.98676, 9.98556, 9.98638 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:55:55Z", + "avg_ns": 6376616629, + "stddev_ns": 100093, + "avg_ts": 20.073341, + "stddev_ts": 0.000315, + "samples_ns": [ + 6376511246, + 6376628217, + 6376710424 + ], + "samples_ts": [ + 20.0737, + 20.0733, + 20.073 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T07:56:21Z", + "avg_ns": 51270597229, + "stddev_ns": 3137723, + "avg_ts": 9.98623, + "stddev_ts": 0.000611, + "samples_ns": [ + 51267903943, + 51274042673, + 51269845071 + ], + "samples_ts": [ + 9.98676, + 9.98556, + 9.98638 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 605 + }, + { + "timestamp_utc": "2025-12-09T08:01:16.791493+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T07:58:56Z\",\n \"avg_ns\": 25649951229,\n \"stddev_ns\": 627001,\n \"avg_ts\": 19.961052,\n \"stddev_ts\": 0.000488,\n \"samples_ns\": [ 25649871862, 25650614135, 25649367690 ],\n \"samples_ts\": [ 19.9611, 19.9605, 19.9615 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:00:38Z\",\n \"avg_ns\": 12606877767,\n \"stddev_ns\": 881508,\n \"avg_ts\": 10.153188,\n \"stddev_ts\": 0.000698,\n \"samples_ns\": [ 12606943818, 12607709943, 12605979542 ],\n \"samples_ts\": [ 10.1531, 10.1525, 10.1539 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T07:58:56Z", + "avg_ns": 25649951229, + "stddev_ns": 627001, + "avg_ts": 19.961052, + "stddev_ts": 0.000488, + "samples_ns": [ + 25649871862, + 25650614135, + 25649367690 + ], + "samples_ts": [ + 19.9611, + 19.9605, + 19.9615 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:00:38Z", + "avg_ns": 12606877767, + "stddev_ns": 881508, + "avg_ts": 10.153188, + "stddev_ts": 0.000698, + "samples_ns": [ + 12606943818, + 12607709943, + 12605979542 + ], + "samples_ts": [ + 10.1531, + 10.1525, + 10.1539 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 606 + }, + { + "timestamp_utc": "2025-12-09T08:05:33.619019+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:01:17Z\",\n \"avg_ns\": 25639819267,\n \"stddev_ns\": 2033252,\n \"avg_ts\": 19.968940,\n \"stddev_ts\": 0.001574,\n \"samples_ns\": [ 25639486755, 25637985546, 25641985502 ],\n \"samples_ts\": [ 19.9692, 19.9704, 19.9673 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:03:00Z\",\n \"avg_ns\": 51047429391,\n \"stddev_ns\": 938772,\n \"avg_ts\": 10.029888,\n \"stddev_ts\": 0.000179,\n \"samples_ns\": [ 51047132503, 51046703685, 51048451986 ],\n \"samples_ts\": [ 10.0299, 10.03, 10.0297 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:01:17Z", + "avg_ns": 25639819267, + "stddev_ns": 2033252, + "avg_ts": 19.96894, + "stddev_ts": 0.001574, + "samples_ns": [ + 25639486755, + 25637985546, + 25641985502 + ], + "samples_ts": [ + 19.9692, + 19.9704, + 19.9673 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:03:00Z", + "avg_ns": 51047429391, + "stddev_ns": 938772, + "avg_ts": 10.029888, + "stddev_ts": 0.000179, + "samples_ns": [ + 51047132503, + 51046703685, + 51048451986 + ], + "samples_ts": [ + 10.0299, + 10.03, + 10.0297 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 607 + }, + { + "timestamp_utc": "2025-12-09T08:06:38.116846+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:05:34Z\",\n \"avg_ns\": 6375177771,\n \"stddev_ns\": 238622,\n \"avg_ts\": 20.077872,\n \"stddev_ts\": 0.000662,\n \"samples_ns\": [ 6375120661, 6375002013, 6375410641 ],\n \"samples_ts\": [ 20.0781, 20.0784, 20.0771 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:06:00Z\",\n \"avg_ns\": 12635129360,\n \"stddev_ns\": 1229392,\n \"avg_ts\": 10.130486,\n \"stddev_ts\": 0.000982,\n \"samples_ns\": [ 12635792239, 12635879223, 12633716619 ],\n \"samples_ts\": [ 10.13, 10.1299, 10.1316 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:05:34Z", + "avg_ns": 6375177771, + "stddev_ns": 238622, + "avg_ts": 20.077872, + "stddev_ts": 0.000662, + "samples_ns": [ + 6375120661, + 6375002013, + 6375410641 + ], + "samples_ts": [ + 20.0781, + 20.0784, + 20.0771 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:06:00Z", + "avg_ns": 12635129360, + "stddev_ns": 1229392, + "avg_ts": 10.130486, + "stddev_ts": 0.000982, + "samples_ns": [ + 12635792239, + 12635879223, + 12633716619 + ], + "samples_ts": [ + 10.13, + 10.1299, + 10.1316 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 608 + }, + { + "timestamp_utc": "2025-12-09T08:09:38.546044+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:06:39Z\",\n \"avg_ns\": 6377731866,\n \"stddev_ns\": 182161,\n \"avg_ts\": 20.069831,\n \"stddev_ts\": 0.000573,\n \"samples_ns\": [ 6377708738, 6377562373, 6377924487 ],\n \"samples_ts\": [ 20.0699, 20.0704, 20.0692 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:07:04Z\",\n \"avg_ns\": 51280561146,\n \"stddev_ns\": 887077,\n \"avg_ts\": 9.984290,\n \"stddev_ts\": 0.000167,\n \"samples_ns\": [ 51279721142, 51280526814, 51281435483 ],\n \"samples_ts\": [ 9.98445, 9.9843, 9.98412 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:06:39Z", + "avg_ns": 6377731866, + "stddev_ns": 182161, + "avg_ts": 20.069831, + "stddev_ts": 0.000573, + "samples_ns": [ + 6377708738, + 6377562373, + 6377924487 + ], + "samples_ts": [ + 20.0699, + 20.0704, + 20.0692 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:07:04Z", + "avg_ns": 51280561146, + "stddev_ns": 887077, + "avg_ts": 9.98429, + "stddev_ts": 0.000167, + "samples_ns": [ + 51279721142, + 51280526814, + 51281435483 + ], + "samples_ts": [ + 9.98445, + 9.9843, + 9.98412 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 609 + }, + { + "timestamp_utc": "2025-12-09T08:12:03.511659+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:09:39Z\",\n \"avg_ns\": 26464912112,\n \"stddev_ns\": 1355330,\n \"avg_ts\": 19.346371,\n \"stddev_ts\": 0.000991,\n \"samples_ns\": [ 26464684965, 26466366664, 26463684707 ],\n \"samples_ts\": [ 19.3465, 19.3453, 19.3473 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:11:25Z\",\n \"avg_ns\": 12651817639,\n \"stddev_ns\": 721197,\n \"avg_ts\": 10.117123,\n \"stddev_ts\": 0.000570,\n \"samples_ns\": [ 12650995063, 12652228028, 12652229827 ],\n \"samples_ts\": [ 10.1178, 10.1168, 10.1168 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:09:39Z", + "avg_ns": 26464912112, + "stddev_ns": 1355330, + "avg_ts": 19.346371, + "stddev_ts": 0.000991, + "samples_ns": [ + 26464684965, + 26466366664, + 26463684707 + ], + "samples_ts": [ + 19.3465, + 19.3453, + 19.3473 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:11:25Z", + "avg_ns": 12651817639, + "stddev_ns": 721197, + "avg_ts": 10.117123, + "stddev_ts": 0.00057, + "samples_ns": [ + 12650995063, + 12652228028, + 12652229827 + ], + "samples_ts": [ + 10.1178, + 10.1168, + 10.1168 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 610 + }, + { + "timestamp_utc": "2025-12-09T08:16:24.669350+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:12:04Z\",\n \"avg_ns\": 26455033170,\n \"stddev_ns\": 314869,\n \"avg_ts\": 19.353595,\n \"stddev_ts\": 0.000197,\n \"samples_ns\": [ 26454737790, 26455095711, 26455266010 ],\n \"samples_ts\": [ 19.3538, 19.3535, 19.3534 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:13:50Z\",\n \"avg_ns\": 51401834255,\n \"stddev_ns\": 723132,\n \"avg_ts\": 9.960734,\n \"stddev_ts\": 0.000140,\n \"samples_ns\": [ 51402632665, 51401646772, 51401223328 ],\n \"samples_ts\": [ 9.96058, 9.96077, 9.96085 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:12:04Z", + "avg_ns": 26455033170, + "stddev_ns": 314869, + "avg_ts": 19.353595, + "stddev_ts": 0.000197, + "samples_ns": [ + 26454737790, + 26455095711, + 26455266010 + ], + "samples_ts": [ + 19.3538, + 19.3535, + 19.3534 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:13:50Z", + "avg_ns": 51401834255, + "stddev_ns": 723132, + "avg_ts": 9.960734, + "stddev_ts": 0.00014, + "samples_ns": [ + 51402632665, + 51401646772, + 51401223328 + ], + "samples_ts": [ + 9.96058, + 9.96077, + 9.96085 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 611 + }, + { + "timestamp_utc": "2025-12-09T08:17:11.419049+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:16:25Z\",\n \"avg_ns\": 3242594963,\n \"stddev_ns\": 589659,\n \"avg_ts\": 39.474558,\n \"stddev_ts\": 0.007145,\n \"samples_ns\": [ 3242745012, 3241947602, 3243092276 ],\n \"samples_ts\": [ 39.4727, 39.4824, 39.4685 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:16:38Z\",\n \"avg_ns\": 10906505745,\n \"stddev_ns\": 3421348,\n \"avg_ts\": 11.736115,\n \"stddev_ts\": 0.003678,\n \"samples_ns\": [ 10906525024, 10903077988, 10909914225 ],\n \"samples_ts\": [ 11.7361, 11.7398, 11.7324 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:16:25Z", + "avg_ns": 3242594963, + "stddev_ns": 589659, + "avg_ts": 39.474558, + "stddev_ts": 0.007145, + "samples_ns": [ + 3242745012, + 3241947602, + 3243092276 + ], + "samples_ts": [ + 39.4727, + 39.4824, + 39.4685 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:16:38Z", + "avg_ns": 10906505745, + "stddev_ns": 3421348, + "avg_ts": 11.736115, + "stddev_ts": 0.003678, + "samples_ns": [ + 10906525024, + 10903077988, + 10909914225 + ], + "samples_ts": [ + 11.7361, + 11.7398, + 11.7324 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 612 + }, + { + "timestamp_utc": "2025-12-09T08:19:37.635318+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:17:12Z\",\n \"avg_ns\": 3246661858,\n \"stddev_ns\": 332409,\n \"avg_ts\": 39.425110,\n \"stddev_ts\": 0.003977,\n \"samples_ns\": [ 3246901317, 3246795593, 3246288665 ],\n \"samples_ts\": [ 39.4222, 39.4235, 39.4296 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:17:25Z\",\n \"avg_ns\": 44054952084,\n \"stddev_ns\": 2376560,\n \"avg_ts\": 11.621849,\n \"stddev_ts\": 0.000625,\n \"samples_ns\": [ 44056229726, 44052220488, 44056406039 ],\n \"samples_ts\": [ 11.6215, 11.6226, 11.6215 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:17:12Z", + "avg_ns": 3246661858, + "stddev_ns": 332409, + "avg_ts": 39.42511, + "stddev_ts": 0.003977, + "samples_ns": [ + 3246901317, + 3246795593, + 3246288665 + ], + "samples_ts": [ + 39.4222, + 39.4235, + 39.4296 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:17:25Z", + "avg_ns": 44054952084, + "stddev_ns": 2376560, + "avg_ts": 11.621849, + "stddev_ts": 0.000625, + "samples_ns": [ + 44056229726, + 44052220488, + 44056406039 + ], + "samples_ts": [ + 11.6215, + 11.6226, + 11.6215 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 613 + }, + { + "timestamp_utc": "2025-12-09T08:21:03.945836+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:19:38Z\",\n \"avg_ns\": 13072195690,\n \"stddev_ns\": 1026337,\n \"avg_ts\": 39.167100,\n \"stddev_ts\": 0.003037,\n \"samples_ns\": [ 13072995440, 13072535765, 13071055867 ],\n \"samples_ts\": [ 39.1647, 39.1661, 39.1705 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:20:30Z\",\n \"avg_ns\": 10972280397,\n \"stddev_ns\": 2109690,\n \"avg_ts\": 11.665761,\n \"stddev_ts\": 0.002243,\n \"samples_ns\": [ 10973593015, 10969846851, 10973401325 ],\n \"samples_ts\": [ 11.6644, 11.6683, 11.6646 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:19:38Z", + "avg_ns": 13072195690, + "stddev_ns": 1026337, + "avg_ts": 39.1671, + "stddev_ts": 0.003037, + "samples_ns": [ + 13072995440, + 13072535765, + 13071055867 + ], + "samples_ts": [ + 39.1647, + 39.1661, + 39.1705 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:20:30Z", + "avg_ns": 10972280397, + "stddev_ns": 2109690, + "avg_ts": 11.665761, + "stddev_ts": 0.002243, + "samples_ns": [ + 10973593015, + 10969846851, + 10973401325 + ], + "samples_ts": [ + 11.6644, + 11.6683, + 11.6646 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 614 + }, + { + "timestamp_utc": "2025-12-09T08:24:09.869104+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:21:04Z\",\n \"avg_ns\": 13067848485,\n \"stddev_ns\": 828829,\n \"avg_ts\": 39.180130,\n \"stddev_ts\": 0.002437,\n \"samples_ns\": [ 13067831940, 13068669542, 13067043975 ],\n \"samples_ts\": [ 39.1802, 39.1777, 39.1825 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:21:57Z\",\n \"avg_ns\": 44177068596,\n \"stddev_ns\": 9271915,\n \"avg_ts\": 11.589724,\n \"stddev_ts\": 0.002431,\n \"samples_ns\": [ 44169922204, 44187539391, 44173744195 ],\n \"samples_ts\": [ 11.5916, 11.587, 11.5906 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:21:04Z", + "avg_ns": 13067848485, + "stddev_ns": 828829, + "avg_ts": 39.18013, + "stddev_ts": 0.002437, + "samples_ns": [ + 13067831940, + 13068669542, + 13067043975 + ], + "samples_ts": [ + 39.1802, + 39.1777, + 39.1825 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:21:57Z", + "avg_ns": 44177068596, + "stddev_ns": 9271915, + "avg_ts": 11.589724, + "stddev_ts": 0.002431, + "samples_ns": [ + 44169922204, + 44187539391, + 44173744195 + ], + "samples_ts": [ + 11.5916, + 11.587, + 11.5906 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 615 + }, + { + "timestamp_utc": "2025-12-09T08:24:56.643838+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:24:10Z\",\n \"avg_ns\": 3244733190,\n \"stddev_ns\": 128753,\n \"avg_ts\": 39.448544,\n \"stddev_ts\": 0.001221,\n \"samples_ns\": [ 3244636857, 3244725431, 3244837284 ],\n \"samples_ts\": [ 39.4497, 39.4486, 39.4473 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:24:23Z\",\n \"avg_ns\": 10907206829,\n \"stddev_ns\": 1553848,\n \"avg_ts\": 11.735360,\n \"stddev_ts\": 0.001668,\n \"samples_ns\": [ 10908157035, 10905417813, 10908045640 ],\n \"samples_ts\": [ 11.7343, 11.7373, 11.7345 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:24:10Z", + "avg_ns": 3244733190, + "stddev_ns": 128753, + "avg_ts": 39.448544, + "stddev_ts": 0.001221, + "samples_ns": [ + 3244636857, + 3244725431, + 3244837284 + ], + "samples_ts": [ + 39.4497, + 39.4486, + 39.4473 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:24:23Z", + "avg_ns": 10907206829, + "stddev_ns": 1553848, + "avg_ts": 11.73536, + "stddev_ts": 0.001668, + "samples_ns": [ + 10908157035, + 10905417813, + 10908045640 + ], + "samples_ts": [ + 11.7343, + 11.7373, + 11.7345 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 616 + }, + { + "timestamp_utc": "2025-12-09T08:27:22.914789+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:24:57Z\",\n \"avg_ns\": 3245818901,\n \"stddev_ns\": 328789,\n \"avg_ts\": 39.435349,\n \"stddev_ts\": 0.003934,\n \"samples_ns\": [ 3245975253, 3246034872, 3245446579 ],\n \"samples_ts\": [ 39.4334, 39.4327, 39.4399 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:25:10Z\",\n \"avg_ns\": 44056108569,\n \"stddev_ns\": 4086716,\n \"avg_ts\": 11.621544,\n \"stddev_ts\": 0.001077,\n \"samples_ns\": [ 44052418370, 44060492176, 44055415162 ],\n \"samples_ts\": [ 11.6225, 11.6204, 11.6217 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:24:57Z", + "avg_ns": 3245818901, + "stddev_ns": 328789, + "avg_ts": 39.435349, + "stddev_ts": 0.003934, + "samples_ns": [ + 3245975253, + 3246034872, + 3245446579 + ], + "samples_ts": [ + 39.4334, + 39.4327, + 39.4399 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:25:10Z", + "avg_ns": 44056108569, + "stddev_ns": 4086716, + "avg_ts": 11.621544, + "stddev_ts": 0.001077, + "samples_ns": [ + 44052418370, + 44060492176, + 44055415162 + ], + "samples_ts": [ + 11.6225, + 11.6204, + 11.6217 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 617 + }, + { + "timestamp_utc": "2025-12-09T08:28:49.086456+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:27:23Z\",\n \"avg_ns\": 13085992678,\n \"stddev_ns\": 1136135,\n \"avg_ts\": 39.125805,\n \"stddev_ts\": 0.003397,\n \"samples_ns\": [ 13086997615, 13086220539, 13084759880 ],\n \"samples_ts\": [ 39.1228, 39.1251, 39.1295 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:28:16Z\",\n \"avg_ns\": 10908745742,\n \"stddev_ns\": 1901902,\n \"avg_ts\": 11.733705,\n \"stddev_ts\": 0.002046,\n \"samples_ns\": [ 10908625410, 10906906863, 10910704953 ],\n \"samples_ts\": [ 11.7338, 11.7357, 11.7316 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:27:23Z", + "avg_ns": 13085992678, + "stddev_ns": 1136135, + "avg_ts": 39.125805, + "stddev_ts": 0.003397, + "samples_ns": [ + 13086997615, + 13086220539, + 13084759880 + ], + "samples_ts": [ + 39.1228, + 39.1251, + 39.1295 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:28:16Z", + "avg_ns": 10908745742, + "stddev_ns": 1901902, + "avg_ts": 11.733705, + "stddev_ts": 0.002046, + "samples_ns": [ + 10908625410, + 10906906863, + 10910704953 + ], + "samples_ts": [ + 11.7338, + 11.7357, + 11.7316 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 618 + }, + { + "timestamp_utc": "2025-12-09T08:31:54.712548+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:28:50Z\",\n \"avg_ns\": 13088945973,\n \"stddev_ns\": 582598,\n \"avg_ts\": 39.116977,\n \"stddev_ts\": 0.001673,\n \"samples_ns\": [ 13089566305, 13088478854, 13088792762 ],\n \"samples_ts\": [ 39.1151, 39.1184, 39.1174 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:29:42Z\",\n \"avg_ns\": 44044474021,\n \"stddev_ns\": 6439614,\n \"avg_ts\": 11.624614,\n \"stddev_ts\": 0.001699,\n \"samples_ns\": [ 44051886996, 44040308792, 44041226276 ],\n \"samples_ts\": [ 11.6227, 11.6257, 11.6255 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:28:50Z", + "avg_ns": 13088945973, + "stddev_ns": 582598, + "avg_ts": 39.116977, + "stddev_ts": 0.001673, + "samples_ns": [ + 13089566305, + 13088478854, + 13088792762 + ], + "samples_ts": [ + 39.1151, + 39.1184, + 39.1174 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:29:42Z", + "avg_ns": 44044474021, + "stddev_ns": 6439614, + "avg_ts": 11.624614, + "stddev_ts": 0.001699, + "samples_ns": [ + 44051886996, + 44040308792, + 44041226276 + ], + "samples_ts": [ + 11.6227, + 11.6257, + 11.6255 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 619 + }, + { + "timestamp_utc": "2025-12-09T08:32:41.537941+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:31:55Z\",\n \"avg_ns\": 3241817954,\n \"stddev_ns\": 415737,\n \"avg_ts\": 39.484019,\n \"stddev_ts\": 0.005063,\n \"samples_ns\": [ 3241479905, 3242282156, 3241691801 ],\n \"samples_ts\": [ 39.4881, 39.4784, 39.4856 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:32:08Z\",\n \"avg_ns\": 10913749054,\n \"stddev_ns\": 2383562,\n \"avg_ts\": 11.728326,\n \"stddev_ts\": 0.002556,\n \"samples_ns\": [ 10912249031, 10912506081, 10916492052 ],\n \"samples_ts\": [ 11.7299, 11.7297, 11.7254 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:31:55Z", + "avg_ns": 3241817954, + "stddev_ns": 415737, + "avg_ts": 39.484019, + "stddev_ts": 0.005063, + "samples_ns": [ + 3241479905, + 3242282156, + 3241691801 + ], + "samples_ts": [ + 39.4881, + 39.4784, + 39.4856 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:32:08Z", + "avg_ns": 10913749054, + "stddev_ns": 2383562, + "avg_ts": 11.728326, + "stddev_ts": 0.002556, + "samples_ns": [ + 10912249031, + 10912506081, + 10916492052 + ], + "samples_ts": [ + 11.7299, + 11.7297, + 11.7254 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 620 + }, + { + "timestamp_utc": "2025-12-09T08:35:07.704215+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:32:42Z\",\n \"avg_ns\": 3243087914,\n \"stddev_ns\": 398416,\n \"avg_ts\": 39.468557,\n \"stddev_ts\": 0.004799,\n \"samples_ns\": [ 3243101278, 3242687077, 3243475388 ],\n \"samples_ts\": [ 39.4684, 39.4734, 39.4638 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:32:55Z\",\n \"avg_ns\": 44033548013,\n \"stddev_ns\": 2407896,\n \"avg_ts\": 11.627498,\n \"stddev_ts\": 0.000633,\n \"samples_ns\": [ 44033598508, 44035921103, 44031124429 ],\n \"samples_ts\": [ 11.6275, 11.6269, 11.6281 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:32:42Z", + "avg_ns": 3243087914, + "stddev_ns": 398416, + "avg_ts": 39.468557, + "stddev_ts": 0.004799, + "samples_ns": [ + 3243101278, + 3242687077, + 3243475388 + ], + "samples_ts": [ + 39.4684, + 39.4734, + 39.4638 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:32:55Z", + "avg_ns": 44033548013, + "stddev_ns": 2407896, + "avg_ts": 11.627498, + "stddev_ts": 0.000633, + "samples_ns": [ + 44033598508, + 44035921103, + 44031124429 + ], + "samples_ts": [ + 11.6275, + 11.6269, + 11.6281 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 621 + }, + { + "timestamp_utc": "2025-12-09T08:36:35.887514+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:35:08Z\",\n \"avg_ns\": 13529582255,\n \"stddev_ns\": 572716,\n \"avg_ts\": 37.843001,\n \"stddev_ts\": 0.001534,\n \"samples_ns\": [ 13529792442, 13528959658, 13529994667 ],\n \"samples_ts\": [ 37.8424, 37.8447, 37.8418 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:36:02Z\",\n \"avg_ns\": 10988688743,\n \"stddev_ns\": 2856768,\n \"avg_ts\": 11.648342,\n \"stddev_ts\": 0.003028,\n \"samples_ns\": [ 10988377494, 10991688391, 10986000344 ],\n \"samples_ts\": [ 11.6487, 11.6452, 11.6512 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:35:08Z", + "avg_ns": 13529582255, + "stddev_ns": 572716, + "avg_ts": 37.843001, + "stddev_ts": 0.001534, + "samples_ns": [ + 13529792442, + 13528959658, + 13529994667 + ], + "samples_ts": [ + 37.8424, + 37.8447, + 37.8418 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:36:02Z", + "avg_ns": 10988688743, + "stddev_ns": 2856768, + "avg_ts": 11.648342, + "stddev_ts": 0.003028, + "samples_ns": [ + 10988377494, + 10991688391, + 10986000344 + ], + "samples_ts": [ + 11.6487, + 11.6452, + 11.6512 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 622 + }, + { + "timestamp_utc": "2025-12-09T08:39:43.274643+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:36:36Z\",\n \"avg_ns\": 13507689127,\n \"stddev_ns\": 485237,\n \"avg_ts\": 37.904337,\n \"stddev_ts\": 0.001322,\n \"samples_ns\": [ 13507818321, 13507166896, 13508082165 ],\n \"samples_ts\": [ 37.904, 37.9058, 37.9032 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:37:30Z\",\n \"avg_ns\": 44081123009,\n \"stddev_ns\": 1934827,\n \"avg_ts\": 11.614949,\n \"stddev_ts\": 0.000507,\n \"samples_ns\": [ 44081150223, 44079186145, 44083032660 ],\n \"samples_ts\": [ 11.6149, 11.6155, 11.6144 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:36:36Z", + "avg_ns": 13507689127, + "stddev_ns": 485237, + "avg_ts": 37.904337, + "stddev_ts": 0.001322, + "samples_ns": [ + 13507818321, + 13507166896, + 13508082165 + ], + "samples_ts": [ + 37.904, + 37.9058, + 37.9032 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:37:30Z", + "avg_ns": 44081123009, + "stddev_ns": 1934827, + "avg_ts": 11.614949, + "stddev_ts": 0.000507, + "samples_ns": [ + 44081150223, + 44079186145, + 44083032660 + ], + "samples_ts": [ + 11.6149, + 11.6155, + 11.6144 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 623 + }, + { + "timestamp_utc": "2025-12-09T08:40:30.052621+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:39:44Z\",\n \"avg_ns\": 3242454579,\n \"stddev_ns\": 231517,\n \"avg_ts\": 39.476266,\n \"stddev_ts\": 0.002819,\n \"samples_ns\": [ 3242526496, 3242195638, 3242641603 ],\n \"samples_ts\": [ 39.4754, 39.4794, 39.474 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:39:57Z\",\n \"avg_ns\": 10905052718,\n \"stddev_ns\": 2194196,\n \"avg_ts\": 11.737679,\n \"stddev_ts\": 0.002362,\n \"samples_ns\": [ 10906776421, 10905799013, 10902582720 ],\n \"samples_ts\": [ 11.7358, 11.7369, 11.7403 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:39:44Z", + "avg_ns": 3242454579, + "stddev_ns": 231517, + "avg_ts": 39.476266, + "stddev_ts": 0.002819, + "samples_ns": [ + 3242526496, + 3242195638, + 3242641603 + ], + "samples_ts": [ + 39.4754, + 39.4794, + 39.474 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:39:57Z", + "avg_ns": 10905052718, + "stddev_ns": 2194196, + "avg_ts": 11.737679, + "stddev_ts": 0.002362, + "samples_ns": [ + 10906776421, + 10905799013, + 10902582720 + ], + "samples_ts": [ + 11.7358, + 11.7369, + 11.7403 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 624 + }, + { + "timestamp_utc": "2025-12-09T08:42:56.340766+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:40:30Z\",\n \"avg_ns\": 3246810670,\n \"stddev_ns\": 401638,\n \"avg_ts\": 39.423303,\n \"stddev_ts\": 0.004877,\n \"samples_ns\": [ 3246367772, 3247151259, 3246912979 ],\n \"samples_ts\": [ 39.4287, 39.4192, 39.4221 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:40:43Z\",\n \"avg_ns\": 44069128854,\n \"stddev_ns\": 3034030,\n \"avg_ts\": 11.618110,\n \"stddev_ts\": 0.000800,\n \"samples_ns\": [ 44072536304, 44066719929, 44068130329 ],\n \"samples_ts\": [ 11.6172, 11.6187, 11.6184 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:40:30Z", + "avg_ns": 3246810670, + "stddev_ns": 401638, + "avg_ts": 39.423303, + "stddev_ts": 0.004877, + "samples_ns": [ + 3246367772, + 3247151259, + 3246912979 + ], + "samples_ts": [ + 39.4287, + 39.4192, + 39.4221 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:40:43Z", + "avg_ns": 44069128854, + "stddev_ns": 3034030, + "avg_ts": 11.61811, + "stddev_ts": 0.0008, + "samples_ns": [ + 44072536304, + 44066719929, + 44068130329 + ], + "samples_ts": [ + 11.6172, + 11.6187, + 11.6184 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 625 + }, + { + "timestamp_utc": "2025-12-09T08:44:22.462043+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:42:57Z\",\n \"avg_ns\": 13065897473,\n \"stddev_ns\": 409820,\n \"avg_ts\": 39.185980,\n \"stddev_ts\": 0.001129,\n \"samples_ns\": [ 13066237011, 13065962985, 13065492425 ],\n \"samples_ts\": [ 39.185, 39.1858, 39.1872 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:43:49Z\",\n \"avg_ns\": 10903833253,\n \"stddev_ns\": 5244701,\n \"avg_ts\": 11.738993,\n \"stddev_ts\": 0.005646,\n \"samples_ns\": [ 10908021181, 10905526365, 10897952214 ],\n \"samples_ts\": [ 11.7345, 11.7372, 11.7453 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:42:57Z", + "avg_ns": 13065897473, + "stddev_ns": 409820, + "avg_ts": 39.18598, + "stddev_ts": 0.001129, + "samples_ns": [ + 13066237011, + 13065962985, + 13065492425 + ], + "samples_ts": [ + 39.185, + 39.1858, + 39.1872 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:43:49Z", + "avg_ns": 10903833253, + "stddev_ns": 5244701, + "avg_ts": 11.738993, + "stddev_ts": 0.005646, + "samples_ns": [ + 10908021181, + 10905526365, + 10897952214 + ], + "samples_ts": [ + 11.7345, + 11.7372, + 11.7453 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 626 + }, + { + "timestamp_utc": "2025-12-09T08:47:28.004756+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:44:23Z\",\n \"avg_ns\": 13075853783,\n \"stddev_ns\": 688801,\n \"avg_ts\": 39.156143,\n \"stddev_ts\": 0.002034,\n \"samples_ns\": [ 13076440158, 13075109492, 13076011700 ],\n \"samples_ts\": [ 39.1544, 39.1584, 39.1557 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:45:15Z\",\n \"avg_ns\": 44050367800,\n \"stddev_ns\": 965148,\n \"avg_ts\": 11.623058,\n \"stddev_ts\": 0.000255,\n \"samples_ns\": [ 44049255420, 44050982894, 44050865086 ],\n \"samples_ts\": [ 11.6234, 11.6229, 11.6229 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:44:23Z", + "avg_ns": 13075853783, + "stddev_ns": 688801, + "avg_ts": 39.156143, + "stddev_ts": 0.002034, + "samples_ns": [ + 13076440158, + 13075109492, + 13076011700 + ], + "samples_ts": [ + 39.1544, + 39.1584, + 39.1557 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:45:15Z", + "avg_ns": 44050367800, + "stddev_ns": 965148, + "avg_ts": 11.623058, + "stddev_ts": 0.000255, + "samples_ns": [ + 44049255420, + 44050982894, + 44050865086 + ], + "samples_ts": [ + 11.6234, + 11.6229, + 11.6229 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 627 + }, + { + "timestamp_utc": "2025-12-09T08:48:15.101813+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:47:28Z\",\n \"avg_ns\": 3243301465,\n \"stddev_ns\": 343637,\n \"avg_ts\": 39.465958,\n \"stddev_ts\": 0.004182,\n \"samples_ns\": [ 3243643606, 3242956351, 3243304438 ],\n \"samples_ts\": [ 39.4618, 39.4702, 39.4659 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:47:41Z\",\n \"avg_ns\": 10988299830,\n \"stddev_ns\": 24908433,\n \"avg_ts\": 11.648794,\n \"stddev_ts\": 0.026440,\n \"samples_ns\": [ 10959545243, 11003214092, 11002140157 ],\n \"samples_ts\": [ 11.6793, 11.633, 11.6341 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:47:28Z", + "avg_ns": 3243301465, + "stddev_ns": 343637, + "avg_ts": 39.465958, + "stddev_ts": 0.004182, + "samples_ns": [ + 3243643606, + 3242956351, + 3243304438 + ], + "samples_ts": [ + 39.4618, + 39.4702, + 39.4659 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:47:41Z", + "avg_ns": 10988299830, + "stddev_ns": 24908433, + "avg_ts": 11.648794, + "stddev_ts": 0.02644, + "samples_ns": [ + 10959545243, + 11003214092, + 11002140157 + ], + "samples_ts": [ + 11.6793, + 11.633, + 11.6341 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 628 + }, + { + "timestamp_utc": "2025-12-09T08:50:41.684106+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:48:15Z\",\n \"avg_ns\": 3244831593,\n \"stddev_ns\": 113519,\n \"avg_ts\": 39.447348,\n \"stddev_ts\": 0.001194,\n \"samples_ns\": [ 3244944349, 3244764895, 3244785536 ],\n \"samples_ts\": [ 39.446, 39.4482, 39.4479 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:48:28Z\",\n \"avg_ns\": 44176559683,\n \"stddev_ns\": 14481265,\n \"avg_ts\": 11.589858,\n \"stddev_ts\": 0.003799,\n \"samples_ns\": [ 44189432221, 44179361490, 44160885340 ],\n \"samples_ts\": [ 11.5865, 11.5891, 11.594 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:48:15Z", + "avg_ns": 3244831593, + "stddev_ns": 113519, + "avg_ts": 39.447348, + "stddev_ts": 0.001194, + "samples_ns": [ + 3244944349, + 3244764895, + 3244785536 + ], + "samples_ts": [ + 39.446, + 39.4482, + 39.4479 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:48:28Z", + "avg_ns": 44176559683, + "stddev_ns": 14481265, + "avg_ts": 11.589858, + "stddev_ts": 0.003799, + "samples_ns": [ + 44189432221, + 44179361490, + 44160885340 + ], + "samples_ts": [ + 11.5865, + 11.5891, + 11.594 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 629 + }, + { + "timestamp_utc": "2025-12-09T08:52:07.808789+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:50:42Z\",\n \"avg_ns\": 13078923300,\n \"stddev_ns\": 1041923,\n \"avg_ts\": 39.146953,\n \"stddev_ts\": 0.003100,\n \"samples_ns\": [ 13078506802, 13080102334, 13078160765 ],\n \"samples_ts\": [ 39.1482, 39.1434, 39.1492 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:51:34Z\",\n \"avg_ns\": 10914142969,\n \"stddev_ns\": 1325833,\n \"avg_ts\": 11.727902,\n \"stddev_ts\": 0.001425,\n \"samples_ns\": [ 10913784748, 10913033052, 10915611107 ],\n \"samples_ts\": [ 11.7283, 11.7291, 11.7263 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:50:42Z", + "avg_ns": 13078923300, + "stddev_ns": 1041923, + "avg_ts": 39.146953, + "stddev_ts": 0.0031, + "samples_ns": [ + 13078506802, + 13080102334, + 13078160765 + ], + "samples_ts": [ + 39.1482, + 39.1434, + 39.1492 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:51:34Z", + "avg_ns": 10914142969, + "stddev_ns": 1325833, + "avg_ts": 11.727902, + "stddev_ts": 0.001425, + "samples_ns": [ + 10913784748, + 10913033052, + 10915611107 + ], + "samples_ts": [ + 11.7283, + 11.7291, + 11.7263 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 630 + }, + { + "timestamp_utc": "2025-12-09T08:55:13.677428+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:52:08Z\",\n \"avg_ns\": 13152663707,\n \"stddev_ns\": 553756,\n \"avg_ts\": 38.927476,\n \"stddev_ts\": 0.001639,\n \"samples_ns\": [ 13152076428, 13152738318, 13153176375 ],\n \"samples_ts\": [ 38.9292, 38.9273, 38.926 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:53:01Z\",\n \"avg_ns\": 44060142512,\n \"stddev_ns\": 7879969,\n \"avg_ts\": 11.620480,\n \"stddev_ts\": 0.002077,\n \"samples_ns\": [ 44051229419, 44063042344, 44066155775 ],\n \"samples_ts\": [ 11.6228, 11.6197, 11.6189 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:52:08Z", + "avg_ns": 13152663707, + "stddev_ns": 553756, + "avg_ts": 38.927476, + "stddev_ts": 0.001639, + "samples_ns": [ + 13152076428, + 13152738318, + 13153176375 + ], + "samples_ts": [ + 38.9292, + 38.9273, + 38.926 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:53:01Z", + "avg_ns": 44060142512, + "stddev_ns": 7879969, + "avg_ts": 11.62048, + "stddev_ts": 0.002077, + "samples_ns": [ + 44051229419, + 44063042344, + 44066155775 + ], + "samples_ts": [ + 11.6228, + 11.6197, + 11.6189 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 631 + }, + { + "timestamp_utc": "2025-12-09T08:56:00.667770+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:55:14Z\",\n \"avg_ns\": 3246220793,\n \"stddev_ns\": 316299,\n \"avg_ts\": 39.430467,\n \"stddev_ts\": 0.003715,\n \"samples_ns\": [ 3246267697, 3246500497, 3245894187 ],\n \"samples_ts\": [ 39.4299, 39.4271, 39.4344 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:55:27Z\",\n \"avg_ns\": 10978083848,\n \"stddev_ns\": 1855686,\n \"avg_ts\": 11.659594,\n \"stddev_ts\": 0.001968,\n \"samples_ns\": [ 10979043450, 10975948159, 10979259936 ],\n \"samples_ts\": [ 11.6586, 11.6619, 11.6583 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:55:14Z", + "avg_ns": 3246220793, + "stddev_ns": 316299, + "avg_ts": 39.430467, + "stddev_ts": 0.003715, + "samples_ns": [ + 3246267697, + 3246500497, + 3245894187 + ], + "samples_ts": [ + 39.4299, + 39.4271, + 39.4344 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:55:27Z", + "avg_ns": 10978083848, + "stddev_ns": 1855686, + "avg_ts": 11.659594, + "stddev_ts": 0.001968, + "samples_ns": [ + 10979043450, + 10975948159, + 10979259936 + ], + "samples_ts": [ + 11.6586, + 11.6619, + 11.6583 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 632 + }, + { + "timestamp_utc": "2025-12-09T08:58:27.946280+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:56:01Z\",\n \"avg_ns\": 3242090190,\n \"stddev_ns\": 626702,\n \"avg_ts\": 39.480704,\n \"stddev_ts\": 0.007601,\n \"samples_ns\": [ 3241407690, 3242231043, 3242631838 ],\n \"samples_ts\": [ 39.489, 39.479, 39.4741 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:56:14Z\",\n \"avg_ns\": 44412905771,\n \"stddev_ns\": 1775307,\n \"avg_ts\": 11.528181,\n \"stddev_ts\": 0.000458,\n \"samples_ns\": [ 44411561442, 44414901525, 44412254347 ],\n \"samples_ts\": [ 11.5285, 11.5277, 11.5283 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:56:01Z", + "avg_ns": 3242090190, + "stddev_ns": 626702, + "avg_ts": 39.480704, + "stddev_ts": 0.007601, + "samples_ns": [ + 3241407690, + 3242231043, + 3242631838 + ], + "samples_ts": [ + 39.489, + 39.479, + 39.4741 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T08:56:14Z", + "avg_ns": 44412905771, + "stddev_ns": 1775307, + "avg_ts": 11.528181, + "stddev_ts": 0.000458, + "samples_ns": [ + 44411561442, + 44414901525, + 44412254347 + ], + "samples_ts": [ + 11.5285, + 11.5277, + 11.5283 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 633 + }, + { + "timestamp_utc": "2025-12-09T08:59:55.741055+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:58:28Z\",\n \"avg_ns\": 13496720361,\n \"stddev_ns\": 2720257,\n \"avg_ts\": 37.935143,\n \"stddev_ts\": 0.007631,\n \"samples_ns\": [ 13499807752, 13495649768, 13494703565 ],\n \"samples_ts\": [ 37.9265, 37.9382, 37.9408 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:59:22Z\",\n \"avg_ns\": 10907698514,\n \"stddev_ns\": 2945071,\n \"avg_ts\": 11.734832,\n \"stddev_ts\": 0.003164,\n \"samples_ns\": [ 10905884659, 10906118662, 10911092223 ],\n \"samples_ts\": [ 11.7368, 11.7365, 11.7312 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:58:28Z", + "avg_ns": 13496720361, + "stddev_ns": 2720257, + "avg_ts": 37.935143, + "stddev_ts": 0.007631, + "samples_ns": [ + 13499807752, + 13495649768, + 13494703565 + ], + "samples_ts": [ + 37.9265, + 37.9382, + 37.9408 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T08:59:22Z", + "avg_ns": 10907698514, + "stddev_ns": 2945071, + "avg_ts": 11.734832, + "stddev_ts": 0.003164, + "samples_ns": [ + 10905884659, + 10906118662, + 10911092223 + ], + "samples_ts": [ + 11.7368, + 11.7365, + 11.7312 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 634 + }, + { + "timestamp_utc": "2025-12-09T09:03:03.027466+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T08:59:56Z\",\n \"avg_ns\": 13516548650,\n \"stddev_ns\": 620540,\n \"avg_ts\": 37.879492,\n \"stddev_ts\": 0.001708,\n \"samples_ns\": [ 13516118032, 13517246121, 13516281798 ],\n \"samples_ts\": [ 37.8807, 37.8775, 37.8802 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:00:50Z\",\n \"avg_ns\": 44049700287,\n \"stddev_ns\": 5235641,\n \"avg_ts\": 11.623235,\n \"stddev_ts\": 0.001379,\n \"samples_ns\": [ 44055651298, 44045851310, 44047598255 ],\n \"samples_ts\": [ 11.6217, 11.6243, 11.6238 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T08:59:56Z", + "avg_ns": 13516548650, + "stddev_ns": 620540, + "avg_ts": 37.879492, + "stddev_ts": 0.001708, + "samples_ns": [ + 13516118032, + 13517246121, + 13516281798 + ], + "samples_ts": [ + 37.8807, + 37.8775, + 37.8802 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:00:50Z", + "avg_ns": 44049700287, + "stddev_ns": 5235641, + "avg_ts": 11.623235, + "stddev_ts": 0.001379, + "samples_ns": [ + 44055651298, + 44045851310, + 44047598255 + ], + "samples_ts": [ + 11.6217, + 11.6243, + 11.6238 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 635 + }, + { + "timestamp_utc": "2025-12-09T09:03:49.792565+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:03:03Z\",\n \"avg_ns\": 3244128190,\n \"stddev_ns\": 268380,\n \"avg_ts\": 39.455901,\n \"stddev_ts\": 0.003264,\n \"samples_ns\": [ 3244267212, 3243818819, 3244298539 ],\n \"samples_ts\": [ 39.4542, 39.4597, 39.4538 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:03:16Z\",\n \"avg_ns\": 10914802198,\n \"stddev_ns\": 1212761,\n \"avg_ts\": 11.727194,\n \"stddev_ts\": 0.001293,\n \"samples_ns\": [ 10915518530, 10913412474, 10915475592 ],\n \"samples_ts\": [ 11.7264, 11.7287, 11.7265 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:03:03Z", + "avg_ns": 3244128190, + "stddev_ns": 268380, + "avg_ts": 39.455901, + "stddev_ts": 0.003264, + "samples_ns": [ + 3244267212, + 3243818819, + 3244298539 + ], + "samples_ts": [ + 39.4542, + 39.4597, + 39.4538 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:03:16Z", + "avg_ns": 10914802198, + "stddev_ns": 1212761, + "avg_ts": 11.727194, + "stddev_ts": 0.001293, + "samples_ns": [ + 10915518530, + 10913412474, + 10915475592 + ], + "samples_ts": [ + 11.7264, + 11.7287, + 11.7265 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 636 + }, + { + "timestamp_utc": "2025-12-09T09:06:15.923837+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:03:50Z\",\n \"avg_ns\": 3247609957,\n \"stddev_ns\": 366688,\n \"avg_ts\": 39.413600,\n \"stddev_ts\": 0.004341,\n \"samples_ns\": [ 3247320415, 3247499603, 3248009855 ],\n \"samples_ts\": [ 39.4171, 39.4149, 39.4087 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:04:03Z\",\n \"avg_ns\": 44015382583,\n \"stddev_ns\": 594827,\n \"avg_ts\": 11.632297,\n \"stddev_ts\": 0.000147,\n \"samples_ns\": [ 44015099468, 44015024453, 44016023829 ],\n \"samples_ts\": [ 11.6324, 11.6324, 11.6321 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:03:50Z", + "avg_ns": 3247609957, + "stddev_ns": 366688, + "avg_ts": 39.4136, + "stddev_ts": 0.004341, + "samples_ns": [ + 3247320415, + 3247499603, + 3248009855 + ], + "samples_ts": [ + 39.4171, + 39.4149, + 39.4087 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:04:03Z", + "avg_ns": 44015382583, + "stddev_ns": 594827, + "avg_ts": 11.632297, + "stddev_ts": 0.000147, + "samples_ns": [ + 44015099468, + 44015024453, + 44016023829 + ], + "samples_ts": [ + 11.6324, + 11.6324, + 11.6321 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 637 + }, + { + "timestamp_utc": "2025-12-09T09:07:42.013812+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:06:16Z\",\n \"avg_ns\": 13062590838,\n \"stddev_ns\": 1516214,\n \"avg_ts\": 39.195900,\n \"stddev_ts\": 0.004550,\n \"samples_ns\": [ 13064146172, 13061117033, 13062509309 ],\n \"samples_ts\": [ 39.1912, 39.2003, 39.1961 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:07:09Z\",\n \"avg_ns\": 10921865597,\n \"stddev_ns\": 1880108,\n \"avg_ts\": 11.719610,\n \"stddev_ts\": 0.002014,\n \"samples_ns\": [ 10919717856, 10923193025, 10922685911 ],\n \"samples_ts\": [ 11.7219, 11.7182, 11.7187 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:06:16Z", + "avg_ns": 13062590838, + "stddev_ns": 1516214, + "avg_ts": 39.1959, + "stddev_ts": 0.00455, + "samples_ns": [ + 13064146172, + 13061117033, + 13062509309 + ], + "samples_ts": [ + 39.1912, + 39.2003, + 39.1961 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:07:09Z", + "avg_ns": 10921865597, + "stddev_ns": 1880108, + "avg_ts": 11.71961, + "stddev_ts": 0.002014, + "samples_ns": [ + 10919717856, + 10923193025, + 10922685911 + ], + "samples_ts": [ + 11.7219, + 11.7182, + 11.7187 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 638 + }, + { + "timestamp_utc": "2025-12-09T09:10:49.040886+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:07:42Z\",\n \"avg_ns\": 13166128691,\n \"stddev_ns\": 129705443,\n \"avg_ts\": 38.890169,\n \"stddev_ts\": 0.381291,\n \"samples_ns\": [ 13313302178, 13116592925, 13068490972 ],\n \"samples_ts\": [ 38.4578, 39.0345, 39.1782 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:08:35Z\",\n \"avg_ns\": 44385269541,\n \"stddev_ns\": 5195581,\n \"avg_ts\": 11.535359,\n \"stddev_ts\": 0.001349,\n \"samples_ns\": [ 44381811756, 44382757860, 44391239008 ],\n \"samples_ts\": [ 11.5363, 11.536, 11.5338 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:07:42Z", + "avg_ns": 13166128691, + "stddev_ns": 129705443, + "avg_ts": 38.890169, + "stddev_ts": 0.381291, + "samples_ns": [ + 13313302178, + 13116592925, + 13068490972 + ], + "samples_ts": [ + 38.4578, + 39.0345, + 39.1782 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:08:35Z", + "avg_ns": 44385269541, + "stddev_ns": 5195581, + "avg_ts": 11.535359, + "stddev_ts": 0.001349, + "samples_ns": [ + 44381811756, + 44382757860, + 44391239008 + ], + "samples_ts": [ + 11.5363, + 11.536, + 11.5338 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 639 + }, + { + "timestamp_utc": "2025-12-09T09:11:35.807518+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:10:49Z\",\n \"avg_ns\": 3244987390,\n \"stddev_ns\": 316786,\n \"avg_ts\": 39.445454,\n \"stddev_ts\": 0.003851,\n \"samples_ns\": [ 3244692487, 3245322265, 3244947418 ],\n \"samples_ts\": [ 39.449, 39.4414, 39.4459 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:11:02Z\",\n \"avg_ns\": 10906837667,\n \"stddev_ns\": 1280982,\n \"avg_ts\": 11.735757,\n \"stddev_ts\": 0.001369,\n \"samples_ns\": [ 10907917360, 10907160840, 10905434803 ],\n \"samples_ts\": [ 11.7346, 11.7354, 11.7373 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:10:49Z", + "avg_ns": 3244987390, + "stddev_ns": 316786, + "avg_ts": 39.445454, + "stddev_ts": 0.003851, + "samples_ns": [ + 3244692487, + 3245322265, + 3244947418 + ], + "samples_ts": [ + 39.449, + 39.4414, + 39.4459 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:11:02Z", + "avg_ns": 10906837667, + "stddev_ns": 1280982, + "avg_ts": 11.735757, + "stddev_ts": 0.001369, + "samples_ns": [ + 10907917360, + 10907160840, + 10905434803 + ], + "samples_ts": [ + 11.7346, + 11.7354, + 11.7373 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 640 + }, + { + "timestamp_utc": "2025-12-09T09:14:02.188717+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:11:36Z\",\n \"avg_ns\": 3244526844,\n \"stddev_ns\": 316480,\n \"avg_ts\": 39.451053,\n \"stddev_ts\": 0.003721,\n \"samples_ns\": [ 3244528657, 3244831991, 3244219886 ],\n \"samples_ts\": [ 39.451, 39.4473, 39.4548 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:11:49Z\",\n \"avg_ns\": 44102325520,\n \"stddev_ns\": 22706293,\n \"avg_ts\": 11.609367,\n \"stddev_ts\": 0.005978,\n \"samples_ns\": [ 44105168700, 44123476325, 44078331535 ],\n \"samples_ts\": [ 11.6086, 11.6038, 11.6157 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:11:36Z", + "avg_ns": 3244526844, + "stddev_ns": 316480, + "avg_ts": 39.451053, + "stddev_ts": 0.003721, + "samples_ns": [ + 3244528657, + 3244831991, + 3244219886 + ], + "samples_ts": [ + 39.451, + 39.4473, + 39.4548 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:11:49Z", + "avg_ns": 44102325520, + "stddev_ns": 22706293, + "avg_ts": 11.609367, + "stddev_ts": 0.005978, + "samples_ns": [ + 44105168700, + 44123476325, + 44078331535 + ], + "samples_ts": [ + 11.6086, + 11.6038, + 11.6157 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 641 + }, + { + "timestamp_utc": "2025-12-09T09:15:28.329072+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:14:03Z\",\n \"avg_ns\": 13074068898,\n \"stddev_ns\": 671662,\n \"avg_ts\": 39.161489,\n \"stddev_ts\": 0.001953,\n \"samples_ns\": [ 13073741142, 13074819645, 13073645909 ],\n \"samples_ts\": [ 39.1625, 39.1592, 39.1628 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:14:55Z\",\n \"avg_ns\": 10916274677,\n \"stddev_ns\": 2005368,\n \"avg_ts\": 11.725612,\n \"stddev_ts\": 0.002148,\n \"samples_ns\": [ 10915858986, 10918449773, 10914515274 ],\n \"samples_ts\": [ 11.7261, 11.7233, 11.7275 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:14:03Z", + "avg_ns": 13074068898, + "stddev_ns": 671662, + "avg_ts": 39.161489, + "stddev_ts": 0.001953, + "samples_ns": [ + 13073741142, + 13074819645, + 13073645909 + ], + "samples_ts": [ + 39.1625, + 39.1592, + 39.1628 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:14:55Z", + "avg_ns": 10916274677, + "stddev_ns": 2005368, + "avg_ts": 11.725612, + "stddev_ts": 0.002148, + "samples_ns": [ + 10915858986, + 10918449773, + 10914515274 + ], + "samples_ts": [ + 11.7261, + 11.7233, + 11.7275 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 642 + }, + { + "timestamp_utc": "2025-12-09T09:18:33.748224+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:15:29Z\",\n \"avg_ns\": 13082197992,\n \"stddev_ns\": 558457,\n \"avg_ts\": 39.137154,\n \"stddev_ts\": 0.001599,\n \"samples_ns\": [ 13082089485, 13082778440, 13081726053 ],\n \"samples_ts\": [ 39.1375, 39.1354, 39.1386 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:16:21Z\",\n \"avg_ns\": 44007551935,\n \"stddev_ns\": 3664924,\n \"avg_ts\": 11.634367,\n \"stddev_ts\": 0.000969,\n \"samples_ns\": [ 44010903167, 44003638327, 44008114311 ],\n \"samples_ts\": [ 11.6335, 11.6354, 11.6342 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:15:29Z", + "avg_ns": 13082197992, + "stddev_ns": 558457, + "avg_ts": 39.137154, + "stddev_ts": 0.001599, + "samples_ns": [ + 13082089485, + 13082778440, + 13081726053 + ], + "samples_ts": [ + 39.1375, + 39.1354, + 39.1386 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:16:21Z", + "avg_ns": 44007551935, + "stddev_ns": 3664924, + "avg_ts": 11.634367, + "stddev_ts": 0.000969, + "samples_ns": [ + 44010903167, + 44003638327, + 44008114311 + ], + "samples_ts": [ + 11.6335, + 11.6354, + 11.6342 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 643 + }, + { + "timestamp_utc": "2025-12-09T09:19:20.591922+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:18:34Z\",\n \"avg_ns\": 3244965263,\n \"stddev_ns\": 459161,\n \"avg_ts\": 39.445723,\n \"stddev_ts\": 0.005581,\n \"samples_ns\": [ 3244868183, 3245465202, 3244562404 ],\n \"samples_ts\": [ 39.4469, 39.4396, 39.4506 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:18:47Z\",\n \"avg_ns\": 10919375595,\n \"stddev_ns\": 3033078,\n \"avg_ts\": 11.722283,\n \"stddev_ts\": 0.003253,\n \"samples_ns\": [ 10920079175, 10921991374, 10916056238 ],\n \"samples_ts\": [ 11.7215, 11.7195, 11.7258 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:18:34Z", + "avg_ns": 3244965263, + "stddev_ns": 459161, + "avg_ts": 39.445723, + "stddev_ts": 0.005581, + "samples_ns": [ + 3244868183, + 3245465202, + 3244562404 + ], + "samples_ts": [ + 39.4469, + 39.4396, + 39.4506 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:18:47Z", + "avg_ns": 10919375595, + "stddev_ns": 3033078, + "avg_ts": 11.722283, + "stddev_ts": 0.003253, + "samples_ns": [ + 10920079175, + 10921991374, + 10916056238 + ], + "samples_ts": [ + 11.7215, + 11.7195, + 11.7258 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 644 + }, + { + "timestamp_utc": "2025-12-09T09:21:47.838154+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:19:21Z\",\n \"avg_ns\": 3245884271,\n \"stddev_ns\": 285394,\n \"avg_ts\": 39.434555,\n \"stddev_ts\": 0.003326,\n \"samples_ns\": [ 3246179725, 3245833956, 3245639134 ],\n \"samples_ts\": [ 39.431, 39.4352, 39.4375 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:19:34Z\",\n \"avg_ns\": 44399322526,\n \"stddev_ns\": 7315330,\n \"avg_ts\": 11.531708,\n \"stddev_ts\": 0.001899,\n \"samples_ns\": [ 44403127533, 44390892305, 44403947741 ],\n \"samples_ts\": [ 11.5307, 11.5339, 11.5305 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:19:21Z", + "avg_ns": 3245884271, + "stddev_ns": 285394, + "avg_ts": 39.434555, + "stddev_ts": 0.003326, + "samples_ns": [ + 3246179725, + 3245833956, + 3245639134 + ], + "samples_ts": [ + 39.431, + 39.4352, + 39.4375 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:19:34Z", + "avg_ns": 44399322526, + "stddev_ns": 7315330, + "avg_ts": 11.531708, + "stddev_ts": 0.001899, + "samples_ns": [ + 44403127533, + 44390892305, + 44403947741 + ], + "samples_ts": [ + 11.5307, + 11.5339, + 11.5305 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 645 + }, + { + "timestamp_utc": "2025-12-09T09:23:15.686143+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:21:48Z\",\n \"avg_ns\": 13504038436,\n \"stddev_ns\": 341190,\n \"avg_ts\": 37.914584,\n \"stddev_ts\": 0.000840,\n \"samples_ns\": [ 13504121704, 13504286980, 13503706626 ],\n \"samples_ts\": [ 37.9144, 37.9139, 37.9155 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:22:42Z\",\n \"avg_ns\": 10921457070,\n \"stddev_ns\": 2295922,\n \"avg_ts\": 11.720048,\n \"stddev_ts\": 0.002461,\n \"samples_ns\": [ 10919408941, 10923935168, 10921027102 ],\n \"samples_ts\": [ 11.7222, 11.7174, 11.7205 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:21:48Z", + "avg_ns": 13504038436, + "stddev_ns": 341190, + "avg_ts": 37.914584, + "stddev_ts": 0.00084, + "samples_ns": [ + 13504121704, + 13504286980, + 13503706626 + ], + "samples_ts": [ + 37.9144, + 37.9139, + 37.9155 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:22:42Z", + "avg_ns": 10921457070, + "stddev_ns": 2295922, + "avg_ts": 11.720048, + "stddev_ts": 0.002461, + "samples_ns": [ + 10919408941, + 10923935168, + 10921027102 + ], + "samples_ts": [ + 11.7222, + 11.7174, + 11.7205 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 646 + }, + { + "timestamp_utc": "2025-12-09T09:26:24.145185+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:23:16Z\",\n \"avg_ns\": 13500452506,\n \"stddev_ns\": 919000,\n \"avg_ts\": 37.924655,\n \"stddev_ts\": 0.002582,\n \"samples_ns\": [ 13499817742, 13501506344, 13500033432 ],\n \"samples_ts\": [ 37.9264, 37.9217, 37.9258 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:24:10Z\",\n \"avg_ns\": 44446786940,\n \"stddev_ns\": 4466344,\n \"avg_ts\": 11.519393,\n \"stddev_ts\": 0.001157,\n \"samples_ns\": [ 44444872517, 44451891376, 44443596927 ],\n \"samples_ts\": [ 11.5199, 11.5181, 11.5202 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:23:16Z", + "avg_ns": 13500452506, + "stddev_ns": 919000, + "avg_ts": 37.924655, + "stddev_ts": 0.002582, + "samples_ns": [ + 13499817742, + 13501506344, + 13500033432 + ], + "samples_ts": [ + 37.9264, + 37.9217, + 37.9258 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:24:10Z", + "avg_ns": 44446786940, + "stddev_ns": 4466344, + "avg_ts": 11.519393, + "stddev_ts": 0.001157, + "samples_ns": [ + 44444872517, + 44451891376, + 44443596927 + ], + "samples_ts": [ + 11.5199, + 11.5181, + 11.5202 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 647 + }, + { + "timestamp_utc": "2025-12-09T09:27:09.212725+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:26:25Z\",\n \"avg_ns\": 2253946075,\n \"stddev_ns\": 1024502,\n \"avg_ts\": 56.789299,\n \"stddev_ts\": 0.025756,\n \"samples_ns\": [ 2253890975, 2252952440, 2254994812 ],\n \"samples_ts\": [ 56.7907, 56.8143, 56.7629 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:26:34Z\",\n \"avg_ns\": 11660510557,\n \"stddev_ns\": 1466421,\n \"avg_ts\": 10.977221,\n \"stddev_ts\": 0.001380,\n \"samples_ns\": [ 11659435977, 11659914555, 11662181139 ],\n \"samples_ts\": [ 10.9782, 10.9778, 10.9756 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:26:25Z", + "avg_ns": 2253946075, + "stddev_ns": 1024502, + "avg_ts": 56.789299, + "stddev_ts": 0.025756, + "samples_ns": [ + 2253890975, + 2252952440, + 2254994812 + ], + "samples_ts": [ + 56.7907, + 56.8143, + 56.7629 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:26:34Z", + "avg_ns": 11660510557, + "stddev_ns": 1466421, + "avg_ts": 10.977221, + "stddev_ts": 0.00138, + "samples_ns": [ + 11659435977, + 11659914555, + 11662181139 + ], + "samples_ts": [ + 10.9782, + 10.9778, + 10.9756 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 648 + }, + { + "timestamp_utc": "2025-12-09T09:29:40.300875+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:27:10Z\",\n \"avg_ns\": 2258811828,\n \"stddev_ns\": 496582,\n \"avg_ts\": 56.666962,\n \"stddev_ts\": 0.012344,\n \"samples_ns\": [ 2258256302, 2259192642, 2258986542 ],\n \"samples_ts\": [ 56.6809, 56.6574, 56.6626 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:27:19Z\",\n \"avg_ns\": 46994651182,\n \"stddev_ns\": 4096619,\n \"avg_ts\": 10.894857,\n \"stddev_ts\": 0.000947,\n \"samples_ns\": [ 46998994940, 46990886466, 46994072142 ],\n \"samples_ts\": [ 10.8938, 10.8957, 10.895 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:27:10Z", + "avg_ns": 2258811828, + "stddev_ns": 496582, + "avg_ts": 56.666962, + "stddev_ts": 0.012344, + "samples_ns": [ + 2258256302, + 2259192642, + 2258986542 + ], + "samples_ts": [ + 56.6809, + 56.6574, + 56.6626 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:27:19Z", + "avg_ns": 46994651182, + "stddev_ns": 4096619, + "avg_ts": 10.894857, + "stddev_ts": 0.000947, + "samples_ns": [ + 46998994940, + 46990886466, + 46994072142 + ], + "samples_ts": [ + 10.8938, + 10.8957, + 10.895 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 649 + }, + { + "timestamp_utc": "2025-12-09T09:30:52.756800+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:29:41Z\",\n \"avg_ns\": 9068712258,\n \"stddev_ns\": 2105710,\n \"avg_ts\": 56.457852,\n \"stddev_ts\": 0.013082,\n \"samples_ns\": [ 9068567677, 9070882215, 9066686884 ],\n \"samples_ts\": [ 56.4588, 56.4443, 56.4705 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:30:17Z\",\n \"avg_ns\": 11694934021,\n \"stddev_ns\": 2532102,\n \"avg_ts\": 10.944910,\n \"stddev_ts\": 0.002367,\n \"samples_ns\": [ 11694021971, 11692986721, 11697793372 ],\n \"samples_ts\": [ 10.9458, 10.9467, 10.9422 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:29:41Z", + "avg_ns": 9068712258, + "stddev_ns": 2105710, + "avg_ts": 56.457852, + "stddev_ts": 0.013082, + "samples_ns": [ + 9068567677, + 9070882215, + 9066686884 + ], + "samples_ts": [ + 56.4588, + 56.4443, + 56.4705 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:30:17Z", + "avg_ns": 11694934021, + "stddev_ns": 2532102, + "avg_ts": 10.94491, + "stddev_ts": 0.002367, + "samples_ns": [ + 11694021971, + 11692986721, + 11697793372 + ], + "samples_ts": [ + 10.9458, + 10.9467, + 10.9422 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 650 + }, + { + "timestamp_utc": "2025-12-09T09:33:51.367079+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:30:53Z\",\n \"avg_ns\": 9077567783,\n \"stddev_ns\": 678920,\n \"avg_ts\": 56.402774,\n \"stddev_ts\": 0.004218,\n \"samples_ns\": [ 9078277867, 9077500437, 9076925045 ],\n \"samples_ts\": [ 56.3984, 56.4032, 56.4068 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:31:29Z\",\n \"avg_ns\": 47079245562,\n \"stddev_ns\": 9430764,\n \"avg_ts\": 10.875281,\n \"stddev_ts\": 0.002178,\n \"samples_ns\": [ 47085358969, 47083987248, 47068390471 ],\n \"samples_ts\": [ 10.8739, 10.8742, 10.8778 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:30:53Z", + "avg_ns": 9077567783, + "stddev_ns": 678920, + "avg_ts": 56.402774, + "stddev_ts": 0.004218, + "samples_ns": [ + 9078277867, + 9077500437, + 9076925045 + ], + "samples_ts": [ + 56.3984, + 56.4032, + 56.4068 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:31:29Z", + "avg_ns": 47079245562, + "stddev_ns": 9430764, + "avg_ts": 10.875281, + "stddev_ts": 0.002178, + "samples_ns": [ + 47085358969, + 47083987248, + 47068390471 + ], + "samples_ts": [ + 10.8739, + 10.8742, + 10.8778 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 651 + }, + { + "timestamp_utc": "2025-12-09T09:34:36.500587+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:33:52Z\",\n \"avg_ns\": 2258972555,\n \"stddev_ns\": 1107517,\n \"avg_ts\": 56.662937,\n \"stddev_ts\": 0.027779,\n \"samples_ns\": [ 2257900833, 2260112696, 2258904136 ],\n \"samples_ts\": [ 56.6898, 56.6343, 56.6646 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:34:01Z\",\n \"avg_ns\": 11675569045,\n \"stddev_ns\": 2651482,\n \"avg_ts\": 10.963063,\n \"stddev_ts\": 0.002486,\n \"samples_ns\": [ 11675492619, 11678253507, 11672961011 ],\n \"samples_ts\": [ 10.9631, 10.9605, 10.9655 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:33:52Z", + "avg_ns": 2258972555, + "stddev_ns": 1107517, + "avg_ts": 56.662937, + "stddev_ts": 0.027779, + "samples_ns": [ + 2257900833, + 2260112696, + 2258904136 + ], + "samples_ts": [ + 56.6898, + 56.6343, + 56.6646 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:34:01Z", + "avg_ns": 11675569045, + "stddev_ns": 2651482, + "avg_ts": 10.963063, + "stddev_ts": 0.002486, + "samples_ns": [ + 11675492619, + 11678253507, + 11672961011 + ], + "samples_ts": [ + 10.9631, + 10.9605, + 10.9655 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 652 + }, + { + "timestamp_utc": "2025-12-09T09:37:07.549755+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:34:37Z\",\n \"avg_ns\": 2255199274,\n \"stddev_ns\": 2183362,\n \"avg_ts\": 56.757769,\n \"stddev_ts\": 0.054979,\n \"samples_ns\": [ 2252691993, 2256681454, 2256224375 ],\n \"samples_ts\": [ 56.8209, 56.7205, 56.7319 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:34:46Z\",\n \"avg_ns\": 46991701515,\n \"stddev_ns\": 4287042,\n \"avg_ts\": 10.895541,\n \"stddev_ts\": 0.000993,\n \"samples_ns\": [ 46994967297, 46993283083, 46986854166 ],\n \"samples_ts\": [ 10.8948, 10.8952, 10.8967 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:34:37Z", + "avg_ns": 2255199274, + "stddev_ns": 2183362, + "avg_ts": 56.757769, + "stddev_ts": 0.054979, + "samples_ns": [ + 2252691993, + 2256681454, + 2256224375 + ], + "samples_ts": [ + 56.8209, + 56.7205, + 56.7319 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:34:46Z", + "avg_ns": 46991701515, + "stddev_ns": 4287042, + "avg_ts": 10.895541, + "stddev_ts": 0.000993, + "samples_ns": [ + 46994967297, + 46993283083, + 46986854166 + ], + "samples_ts": [ + 10.8948, + 10.8952, + 10.8967 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 653 + }, + { + "timestamp_utc": "2025-12-09T09:38:19.863358+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:37:08Z\",\n \"avg_ns\": 9049818173,\n \"stddev_ns\": 1356482,\n \"avg_ts\": 56.575723,\n \"stddev_ts\": 0.008480,\n \"samples_ns\": [ 9049426698, 9048700478, 9051327343 ],\n \"samples_ts\": [ 56.5782, 56.5827, 56.5663 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:37:44Z\",\n \"avg_ns\": 11672725405,\n \"stddev_ns\": 5889859,\n \"avg_ts\": 10.965736,\n \"stddev_ts\": 0.005534,\n \"samples_ns\": [ 11677751960, 11674179589, 11666244666 ],\n \"samples_ts\": [ 10.961, 10.9644, 10.9718 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:37:08Z", + "avg_ns": 9049818173, + "stddev_ns": 1356482, + "avg_ts": 56.575723, + "stddev_ts": 0.00848, + "samples_ns": [ + 9049426698, + 9048700478, + 9051327343 + ], + "samples_ts": [ + 56.5782, + 56.5827, + 56.5663 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:37:44Z", + "avg_ns": 11672725405, + "stddev_ns": 5889859, + "avg_ts": 10.965736, + "stddev_ts": 0.005534, + "samples_ns": [ + 11677751960, + 11674179589, + 11666244666 + ], + "samples_ts": [ + 10.961, + 10.9644, + 10.9718 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 654 + }, + { + "timestamp_utc": "2025-12-09T09:41:18.399707+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:38:20Z\",\n \"avg_ns\": 9043831769,\n \"stddev_ns\": 1899344,\n \"avg_ts\": 56.613173,\n \"stddev_ts\": 0.011859,\n \"samples_ns\": [ 9043609551, 9042058102, 9045827656 ],\n \"samples_ts\": [ 56.6146, 56.6243, 56.6007 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:38:56Z\",\n \"avg_ns\": 47095510261,\n \"stddev_ns\": 1257635,\n \"avg_ts\": 10.871525,\n \"stddev_ts\": 0.000286,\n \"samples_ns\": [ 47095069077, 47094552478, 47096909229 ],\n \"samples_ts\": [ 10.8716, 10.8717, 10.8712 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:38:20Z", + "avg_ns": 9043831769, + "stddev_ns": 1899344, + "avg_ts": 56.613173, + "stddev_ts": 0.011859, + "samples_ns": [ + 9043609551, + 9042058102, + 9045827656 + ], + "samples_ts": [ + 56.6146, + 56.6243, + 56.6007 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:38:56Z", + "avg_ns": 47095510261, + "stddev_ns": 1257635, + "avg_ts": 10.871525, + "stddev_ts": 0.000286, + "samples_ns": [ + 47095069077, + 47094552478, + 47096909229 + ], + "samples_ts": [ + 10.8716, + 10.8717, + 10.8712 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 655 + }, + { + "timestamp_utc": "2025-12-09T09:42:03.521015+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:41:19Z\",\n \"avg_ns\": 2255625843,\n \"stddev_ns\": 972411,\n \"avg_ts\": 56.747007,\n \"stddev_ts\": 0.024469,\n \"samples_ns\": [ 2256354696, 2256001122, 2254521711 ],\n \"samples_ts\": [ 56.7287, 56.7376, 56.7748 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:41:28Z\",\n \"avg_ns\": 11676996046,\n \"stddev_ns\": 6377441,\n \"avg_ts\": 10.961726,\n \"stddev_ts\": 0.005985,\n \"samples_ns\": [ 11673024021, 11673611861, 11684352256 ],\n \"samples_ts\": [ 10.9655, 10.9649, 10.9548 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:41:19Z", + "avg_ns": 2255625843, + "stddev_ns": 972411, + "avg_ts": 56.747007, + "stddev_ts": 0.024469, + "samples_ns": [ + 2256354696, + 2256001122, + 2254521711 + ], + "samples_ts": [ + 56.7287, + 56.7376, + 56.7748 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:41:28Z", + "avg_ns": 11676996046, + "stddev_ns": 6377441, + "avg_ts": 10.961726, + "stddev_ts": 0.005985, + "samples_ns": [ + 11673024021, + 11673611861, + 11684352256 + ], + "samples_ts": [ + 10.9655, + 10.9649, + 10.9548 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 656 + }, + { + "timestamp_utc": "2025-12-09T09:44:34.640086+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:42:04Z\",\n \"avg_ns\": 2253646556,\n \"stddev_ns\": 1515310,\n \"avg_ts\": 56.796856,\n \"stddev_ts\": 0.038158,\n \"samples_ns\": [ 2255365560, 2253065771, 2252508338 ],\n \"samples_ts\": [ 56.7535, 56.8115, 56.8255 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:42:13Z\",\n \"avg_ns\": 47014864051,\n \"stddev_ns\": 5195054,\n \"avg_ts\": 10.890173,\n \"stddev_ts\": 0.001201,\n \"samples_ns\": [ 47013709988, 47010352305, 47020529862 ],\n \"samples_ts\": [ 10.8904, 10.8912, 10.8889 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:42:04Z", + "avg_ns": 2253646556, + "stddev_ns": 1515310, + "avg_ts": 56.796856, + "stddev_ts": 0.038158, + "samples_ns": [ + 2255365560, + 2253065771, + 2252508338 + ], + "samples_ts": [ + 56.7535, + 56.8115, + 56.8255 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:42:13Z", + "avg_ns": 47014864051, + "stddev_ns": 5195054, + "avg_ts": 10.890173, + "stddev_ts": 0.001201, + "samples_ns": [ + 47013709988, + 47010352305, + 47020529862 + ], + "samples_ts": [ + 10.8904, + 10.8912, + 10.8889 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 657 + }, + { + "timestamp_utc": "2025-12-09T09:45:48.384046+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:44:35Z\",\n \"avg_ns\": 9393076613,\n \"stddev_ns\": 3271555,\n \"avg_ts\": 54.508236,\n \"stddev_ts\": 0.018972,\n \"samples_ns\": [ 9389304878, 9394840842, 9395084121 ],\n \"samples_ts\": [ 54.5301, 54.498, 54.4966 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:45:13Z\",\n \"avg_ns\": 11666979448,\n \"stddev_ns\": 2067549,\n \"avg_ts\": 10.971135,\n \"stddev_ts\": 0.001944,\n \"samples_ns\": [ 11667632273, 11668641785, 11664664286 ],\n \"samples_ts\": [ 10.9705, 10.9696, 10.9733 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:44:35Z", + "avg_ns": 9393076613, + "stddev_ns": 3271555, + "avg_ts": 54.508236, + "stddev_ts": 0.018972, + "samples_ns": [ + 9389304878, + 9394840842, + 9395084121 + ], + "samples_ts": [ + 54.5301, + 54.498, + 54.4966 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:45:13Z", + "avg_ns": 11666979448, + "stddev_ns": 2067549, + "avg_ts": 10.971135, + "stddev_ts": 0.001944, + "samples_ns": [ + 11667632273, + 11668641785, + 11664664286 + ], + "samples_ts": [ + 10.9705, + 10.9696, + 10.9733 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 658 + }, + { + "timestamp_utc": "2025-12-09T09:48:48.144243+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:45:49Z\",\n \"avg_ns\": 9396676832,\n \"stddev_ns\": 3469410,\n \"avg_ts\": 54.487353,\n \"stddev_ts\": 0.020102,\n \"samples_ns\": [ 9400055231, 9396847159, 9393128108 ],\n \"samples_ts\": [ 54.4678, 54.4864, 54.5079 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:46:26Z\",\n \"avg_ns\": 47034020032,\n \"stddev_ns\": 3375722,\n \"avg_ts\": 10.885738,\n \"stddev_ts\": 0.000781,\n \"samples_ns\": [ 47037884304, 47031645118, 47032530674 ],\n \"samples_ts\": [ 10.8848, 10.8863, 10.8861 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:45:49Z", + "avg_ns": 9396676832, + "stddev_ns": 3469410, + "avg_ts": 54.487353, + "stddev_ts": 0.020102, + "samples_ns": [ + 9400055231, + 9396847159, + 9393128108 + ], + "samples_ts": [ + 54.4678, + 54.4864, + 54.5079 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:46:26Z", + "avg_ns": 47034020032, + "stddev_ns": 3375722, + "avg_ts": 10.885738, + "stddev_ts": 0.000781, + "samples_ns": [ + 47037884304, + 47031645118, + 47032530674 + ], + "samples_ts": [ + 10.8848, + 10.8863, + 10.8861 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 659 + }, + { + "timestamp_utc": "2025-12-09T09:49:33.322692+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:48:49Z\",\n \"avg_ns\": 2252956733,\n \"stddev_ns\": 375580,\n \"avg_ts\": 56.814230,\n \"stddev_ts\": 0.009470,\n \"samples_ns\": [ 2252708517, 2253388822, 2252772860 ],\n \"samples_ts\": [ 56.8205, 56.8033, 56.8189 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:48:58Z\",\n \"avg_ns\": 11701107920,\n \"stddev_ns\": 3094105,\n \"avg_ts\": 10.939136,\n \"stddev_ts\": 0.002893,\n \"samples_ns\": [ 11697924307, 11704103987, 11701295466 ],\n \"samples_ts\": [ 10.9421, 10.9363, 10.939 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:48:49Z", + "avg_ns": 2252956733, + "stddev_ns": 375580, + "avg_ts": 56.81423, + "stddev_ts": 0.00947, + "samples_ns": [ + 2252708517, + 2253388822, + 2252772860 + ], + "samples_ts": [ + 56.8205, + 56.8033, + 56.8189 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:48:58Z", + "avg_ns": 11701107920, + "stddev_ns": 3094105, + "avg_ts": 10.939136, + "stddev_ts": 0.002893, + "samples_ns": [ + 11697924307, + 11704103987, + 11701295466 + ], + "samples_ts": [ + 10.9421, + 10.9363, + 10.939 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 660 + }, + { + "timestamp_utc": "2025-12-09T09:52:04.432601+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:49:34Z\",\n \"avg_ns\": 2254717366,\n \"stddev_ns\": 1614972,\n \"avg_ts\": 56.769884,\n \"stddev_ts\": 0.040675,\n \"samples_ns\": [ 2255994483, 2255255610, 2252902005 ],\n \"samples_ts\": [ 56.7377, 56.7563, 56.8156 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:49:43Z\",\n \"avg_ns\": 47008115715,\n \"stddev_ns\": 9629678,\n \"avg_ts\": 10.891737,\n \"stddev_ts\": 0.002231,\n \"samples_ns\": [ 47006704621, 47018373085, 46999269439 ],\n \"samples_ts\": [ 10.8921, 10.8894, 10.8938 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:49:34Z", + "avg_ns": 2254717366, + "stddev_ns": 1614972, + "avg_ts": 56.769884, + "stddev_ts": 0.040675, + "samples_ns": [ + 2255994483, + 2255255610, + 2252902005 + ], + "samples_ts": [ + 56.7377, + 56.7563, + 56.8156 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:49:43Z", + "avg_ns": 47008115715, + "stddev_ns": 9629678, + "avg_ts": 10.891737, + "stddev_ts": 0.002231, + "samples_ns": [ + 47006704621, + 47018373085, + 46999269439 + ], + "samples_ts": [ + 10.8921, + 10.8894, + 10.8938 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 661 + }, + { + "timestamp_utc": "2025-12-09T09:53:16.893350+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:52:05Z\",\n \"avg_ns\": 9076239978,\n \"stddev_ns\": 3130119,\n \"avg_ts\": 56.411029,\n \"stddev_ts\": 0.019433,\n \"samples_ns\": [ 9079807763, 9074938458, 9073973715 ],\n \"samples_ts\": [ 56.3889, 56.4191, 56.4251 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:52:41Z\",\n \"avg_ns\": 11696711631,\n \"stddev_ns\": 1463703,\n \"avg_ts\": 10.943247,\n \"stddev_ts\": 0.001362,\n \"samples_ns\": [ 11698392516, 11695873314, 11695869065 ],\n \"samples_ts\": [ 10.9417, 10.944, 10.944 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:52:05Z", + "avg_ns": 9076239978, + "stddev_ns": 3130119, + "avg_ts": 56.411029, + "stddev_ts": 0.019433, + "samples_ns": [ + 9079807763, + 9074938458, + 9073973715 + ], + "samples_ts": [ + 56.3889, + 56.4191, + 56.4251 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:52:41Z", + "avg_ns": 11696711631, + "stddev_ns": 1463703, + "avg_ts": 10.943247, + "stddev_ts": 0.001362, + "samples_ns": [ + 11698392516, + 11695873314, + 11695869065 + ], + "samples_ts": [ + 10.9417, + 10.944, + 10.944 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 662 + }, + { + "timestamp_utc": "2025-12-09T09:56:16.066986+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:53:17Z\",\n \"avg_ns\": 9060441913,\n \"stddev_ns\": 1558750,\n \"avg_ts\": 56.509386,\n \"stddev_ts\": 0.009686,\n \"samples_ns\": [ 9060868345, 9058720322, 9061737074 ],\n \"samples_ts\": [ 56.5067, 56.5201, 56.5013 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:53:54Z\",\n \"avg_ns\": 47281856686,\n \"stddev_ns\": 5440296,\n \"avg_ts\": 10.828678,\n \"stddev_ts\": 0.001244,\n \"samples_ns\": [ 47281689582, 47276510569, 47287369909 ],\n \"samples_ts\": [ 10.8287, 10.8299, 10.8274 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:53:17Z", + "avg_ns": 9060441913, + "stddev_ns": 1558750, + "avg_ts": 56.509386, + "stddev_ts": 0.009686, + "samples_ns": [ + 9060868345, + 9058720322, + 9061737074 + ], + "samples_ts": [ + 56.5067, + 56.5201, + 56.5013 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:53:54Z", + "avg_ns": 47281856686, + "stddev_ns": 5440296, + "avg_ts": 10.828678, + "stddev_ts": 0.001244, + "samples_ns": [ + 47281689582, + 47276510569, + 47287369909 + ], + "samples_ts": [ + 10.8287, + 10.8299, + 10.8274 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 663 + }, + { + "timestamp_utc": "2025-12-09T09:57:01.145750+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:56:16Z\",\n \"avg_ns\": 2252352584,\n \"stddev_ns\": 348786,\n \"avg_ts\": 56.829469,\n \"stddev_ts\": 0.008719,\n \"samples_ns\": [ 2251983190, 2252406665, 2252667898 ],\n \"samples_ts\": [ 56.8388, 56.8281, 56.8215 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:56:25Z\",\n \"avg_ns\": 11666410130,\n \"stddev_ns\": 3483842,\n \"avg_ts\": 10.971670,\n \"stddev_ts\": 0.003277,\n \"samples_ns\": [ 11669728933, 11666719566, 11662781891 ],\n \"samples_ts\": [ 10.9685, 10.9714, 10.9751 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:56:16Z", + "avg_ns": 2252352584, + "stddev_ns": 348786, + "avg_ts": 56.829469, + "stddev_ts": 0.008719, + "samples_ns": [ + 2251983190, + 2252406665, + 2252667898 + ], + "samples_ts": [ + 56.8388, + 56.8281, + 56.8215 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T09:56:25Z", + "avg_ns": 11666410130, + "stddev_ns": 3483842, + "avg_ts": 10.97167, + "stddev_ts": 0.003277, + "samples_ns": [ + 11669728933, + 11666719566, + 11662781891 + ], + "samples_ts": [ + 10.9685, + 10.9714, + 10.9751 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 664 + }, + { + "timestamp_utc": "2025-12-09T09:59:32.595965+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:57:02Z\",\n \"avg_ns\": 2253780359,\n \"stddev_ns\": 823164,\n \"avg_ts\": 56.793472,\n \"stddev_ts\": 0.020746,\n \"samples_ns\": [ 2254023130, 2252863111, 2254454836 ],\n \"samples_ts\": [ 56.7873, 56.8166, 56.7765 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:57:11Z\",\n \"avg_ns\": 47122209373,\n \"stddev_ns\": 93928951,\n \"avg_ts\": 10.865394,\n \"stddev_ts\": 0.021644,\n \"samples_ns\": [ 47041257738, 47100173011, 47225197370 ],\n \"samples_ts\": [ 10.8841, 10.8704, 10.8417 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:57:02Z", + "avg_ns": 2253780359, + "stddev_ns": 823164, + "avg_ts": 56.793472, + "stddev_ts": 0.020746, + "samples_ns": [ + 2254023130, + 2252863111, + 2254454836 + ], + "samples_ts": [ + 56.7873, + 56.8166, + 56.7765 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T09:57:11Z", + "avg_ns": 47122209373, + "stddev_ns": 93928951, + "avg_ts": 10.865394, + "stddev_ts": 0.021644, + "samples_ns": [ + 47041257738, + 47100173011, + 47225197370 + ], + "samples_ts": [ + 10.8841, + 10.8704, + 10.8417 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 665 + }, + { + "timestamp_utc": "2025-12-09T10:00:44.940066+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T09:59:33Z\",\n \"avg_ns\": 9051553549,\n \"stddev_ns\": 1872225,\n \"avg_ts\": 56.564877,\n \"stddev_ts\": 0.011683,\n \"samples_ns\": [ 9053700736, 9050675833, 9050284079 ],\n \"samples_ts\": [ 56.5515, 56.5704, 56.5728 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:00:09Z\",\n \"avg_ns\": 11682671668,\n \"stddev_ns\": 1634528,\n \"avg_ts\": 10.956398,\n \"stddev_ts\": 0.001530,\n \"samples_ns\": [ 11683708232, 11683515059, 11680791714 ],\n \"samples_ts\": [ 10.9554, 10.9556, 10.9582 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T09:59:33Z", + "avg_ns": 9051553549, + "stddev_ns": 1872225, + "avg_ts": 56.564877, + "stddev_ts": 0.011683, + "samples_ns": [ + 9053700736, + 9050675833, + 9050284079 + ], + "samples_ts": [ + 56.5515, + 56.5704, + 56.5728 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:00:09Z", + "avg_ns": 11682671668, + "stddev_ns": 1634528, + "avg_ts": 10.956398, + "stddev_ts": 0.00153, + "samples_ns": [ + 11683708232, + 11683515059, + 11680791714 + ], + "samples_ts": [ + 10.9554, + 10.9556, + 10.9582 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 666 + }, + { + "timestamp_utc": "2025-12-09T10:03:43.454801+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:00:45Z\",\n \"avg_ns\": 9043234162,\n \"stddev_ns\": 1683786,\n \"avg_ts\": 56.616914,\n \"stddev_ts\": 0.010524,\n \"samples_ns\": [ 9042437845, 9045165456, 9042099186 ],\n \"samples_ts\": [ 56.6219, 56.6048, 56.624 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:01:22Z\",\n \"avg_ns\": 47088553426,\n \"stddev_ns\": 6893596,\n \"avg_ts\": 10.873131,\n \"stddev_ts\": 0.001592,\n \"samples_ns\": [ 47081786439, 47088306825, 47095567014 ],\n \"samples_ts\": [ 10.8747, 10.8732, 10.8715 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:00:45Z", + "avg_ns": 9043234162, + "stddev_ns": 1683786, + "avg_ts": 56.616914, + "stddev_ts": 0.010524, + "samples_ns": [ + 9042437845, + 9045165456, + 9042099186 + ], + "samples_ts": [ + 56.6219, + 56.6048, + 56.624 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:01:22Z", + "avg_ns": 47088553426, + "stddev_ns": 6893596, + "avg_ts": 10.873131, + "stddev_ts": 0.001592, + "samples_ns": [ + 47081786439, + 47088306825, + 47095567014 + ], + "samples_ts": [ + 10.8747, + 10.8732, + 10.8715 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 667 + }, + { + "timestamp_utc": "2025-12-09T10:04:28.676826+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:03:44Z\",\n \"avg_ns\": 2259305583,\n \"stddev_ns\": 599743,\n \"avg_ts\": 56.654579,\n \"stddev_ts\": 0.014992,\n \"samples_ns\": [ 2258708125, 2259304787, 2259903838 ],\n \"samples_ts\": [ 56.6696, 56.6546, 56.6396 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:03:53Z\",\n \"avg_ns\": 11705143787,\n \"stddev_ns\": 4780239,\n \"avg_ts\": 10.935365,\n \"stddev_ts\": 0.004464,\n \"samples_ns\": [ 11710602436, 11703115430, 11701713496 ],\n \"samples_ts\": [ 10.9303, 10.9373, 10.9386 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:03:44Z", + "avg_ns": 2259305583, + "stddev_ns": 599743, + "avg_ts": 56.654579, + "stddev_ts": 0.014992, + "samples_ns": [ + 2258708125, + 2259304787, + 2259903838 + ], + "samples_ts": [ + 56.6696, + 56.6546, + 56.6396 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:03:53Z", + "avg_ns": 11705143787, + "stddev_ns": 4780239, + "avg_ts": 10.935365, + "stddev_ts": 0.004464, + "samples_ns": [ + 11710602436, + 11703115430, + 11701713496 + ], + "samples_ts": [ + 10.9303, + 10.9373, + 10.9386 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 668 + }, + { + "timestamp_utc": "2025-12-09T10:07:00.274656+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:04:29Z\",\n \"avg_ns\": 2253455688,\n \"stddev_ns\": 191690,\n \"avg_ts\": 56.801650,\n \"stddev_ts\": 0.004832,\n \"samples_ns\": [ 2253256646, 2253639066, 2253471352 ],\n \"samples_ts\": [ 56.8067, 56.797, 56.8013 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:04:38Z\",\n \"avg_ns\": 47150248942,\n \"stddev_ns\": 7650567,\n \"avg_ts\": 10.858904,\n \"stddev_ts\": 0.001762,\n \"samples_ns\": [ 47154505807, 47154824278, 47141416741 ],\n \"samples_ts\": [ 10.8579, 10.8578, 10.8609 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:04:29Z", + "avg_ns": 2253455688, + "stddev_ns": 191690, + "avg_ts": 56.80165, + "stddev_ts": 0.004832, + "samples_ns": [ + 2253256646, + 2253639066, + 2253471352 + ], + "samples_ts": [ + 56.8067, + 56.797, + 56.8013 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:04:38Z", + "avg_ns": 47150248942, + "stddev_ns": 7650567, + "avg_ts": 10.858904, + "stddev_ts": 0.001762, + "samples_ns": [ + 47154505807, + 47154824278, + 47141416741 + ], + "samples_ts": [ + 10.8579, + 10.8578, + 10.8609 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 669 + }, + { + "timestamp_utc": "2025-12-09T10:08:14.071574+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:07:01Z\",\n \"avg_ns\": 9393453277,\n \"stddev_ns\": 608361,\n \"avg_ts\": 54.506047,\n \"stddev_ts\": 0.003530,\n \"samples_ns\": [ 9392814713, 9393519022, 9394026096 ],\n \"samples_ts\": [ 54.5098, 54.5057, 54.5027 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:07:38Z\",\n \"avg_ns\": 11709244585,\n \"stddev_ns\": 3061843,\n \"avg_ts\": 10.931534,\n \"stddev_ts\": 0.002857,\n \"samples_ns\": [ 11712073100, 11709664123, 11705996533 ],\n \"samples_ts\": [ 10.9289, 10.9311, 10.9346 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:07:01Z", + "avg_ns": 9393453277, + "stddev_ns": 608361, + "avg_ts": 54.506047, + "stddev_ts": 0.00353, + "samples_ns": [ + 9392814713, + 9393519022, + 9394026096 + ], + "samples_ts": [ + 54.5098, + 54.5057, + 54.5027 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:07:38Z", + "avg_ns": 11709244585, + "stddev_ns": 3061843, + "avg_ts": 10.931534, + "stddev_ts": 0.002857, + "samples_ns": [ + 11712073100, + 11709664123, + 11705996533 + ], + "samples_ts": [ + 10.9289, + 10.9311, + 10.9346 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 670 + }, + { + "timestamp_utc": "2025-12-09T10:11:14.300591+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:08:14Z\",\n \"avg_ns\": 9396905485,\n \"stddev_ns\": 831699,\n \"avg_ts\": 54.486022,\n \"stddev_ts\": 0.004789,\n \"samples_ns\": [ 9396193010, 9396712531, 9397810915 ],\n \"samples_ts\": [ 54.4902, 54.4871, 54.4808 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:08:52Z\",\n \"avg_ns\": 47183084222,\n \"stddev_ns\": 10716996,\n \"avg_ts\": 10.851347,\n \"stddev_ts\": 0.002464,\n \"samples_ns\": [ 47195271369, 47175143510, 47178837788 ],\n \"samples_ts\": [ 10.8485, 10.8532, 10.8523 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:08:14Z", + "avg_ns": 9396905485, + "stddev_ns": 831699, + "avg_ts": 54.486022, + "stddev_ts": 0.004789, + "samples_ns": [ + 9396193010, + 9396712531, + 9397810915 + ], + "samples_ts": [ + 54.4902, + 54.4871, + 54.4808 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:08:52Z", + "avg_ns": 47183084222, + "stddev_ns": 10716996, + "avg_ts": 10.851347, + "stddev_ts": 0.002464, + "samples_ns": [ + 47195271369, + 47175143510, + 47178837788 + ], + "samples_ts": [ + 10.8485, + 10.8532, + 10.8523 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 671 + }, + { + "timestamp_utc": "2025-12-09T10:11:59.529810+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:11:15Z\",\n \"avg_ns\": 2255477709,\n \"stddev_ns\": 997369,\n \"avg_ts\": 56.750735,\n \"stddev_ts\": 0.025089,\n \"samples_ns\": [ 2255066810, 2256614887, 2254751430 ],\n \"samples_ts\": [ 56.7611, 56.7221, 56.769 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:11:24Z\",\n \"avg_ns\": 11706751973,\n \"stddev_ns\": 3011172,\n \"avg_ts\": 10.933862,\n \"stddev_ts\": 0.002811,\n \"samples_ns\": [ 11709802248, 11703785545, 11706668127 ],\n \"samples_ts\": [ 10.931, 10.9366, 10.9339 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:11:15Z", + "avg_ns": 2255477709, + "stddev_ns": 997369, + "avg_ts": 56.750735, + "stddev_ts": 0.025089, + "samples_ns": [ + 2255066810, + 2256614887, + 2254751430 + ], + "samples_ts": [ + 56.7611, + 56.7221, + 56.769 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:11:24Z", + "avg_ns": 11706751973, + "stddev_ns": 3011172, + "avg_ts": 10.933862, + "stddev_ts": 0.002811, + "samples_ns": [ + 11709802248, + 11703785545, + 11706668127 + ], + "samples_ts": [ + 10.931, + 10.9366, + 10.9339 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 672 + }, + { + "timestamp_utc": "2025-12-09T10:14:30.931875+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:12:00Z\",\n \"avg_ns\": 2257966692,\n \"stddev_ns\": 713738,\n \"avg_ts\": 56.688174,\n \"stddev_ts\": 0.017838,\n \"samples_ns\": [ 2258720014, 2257308452, 2257871612 ],\n \"samples_ts\": [ 56.6693, 56.7047, 56.6906 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:12:09Z\",\n \"avg_ns\": 47099733641,\n \"stddev_ns\": 6254628,\n \"avg_ts\": 10.870550,\n \"stddev_ts\": 0.001444,\n \"samples_ns\": [ 47093220851, 47105693378, 47100286694 ],\n \"samples_ts\": [ 10.8721, 10.8692, 10.8704 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:12:00Z", + "avg_ns": 2257966692, + "stddev_ns": 713738, + "avg_ts": 56.688174, + "stddev_ts": 0.017838, + "samples_ns": [ + 2258720014, + 2257308452, + 2257871612 + ], + "samples_ts": [ + 56.6693, + 56.7047, + 56.6906 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:12:09Z", + "avg_ns": 47099733641, + "stddev_ns": 6254628, + "avg_ts": 10.87055, + "stddev_ts": 0.001444, + "samples_ns": [ + 47093220851, + 47105693378, + 47100286694 + ], + "samples_ts": [ + 10.8721, + 10.8692, + 10.8704 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 673 + }, + { + "timestamp_utc": "2025-12-09T10:15:43.492447+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:14:31Z\",\n \"avg_ns\": 9069022903,\n \"stddev_ns\": 2500708,\n \"avg_ts\": 56.455919,\n \"stddev_ts\": 0.015555,\n \"samples_ns\": [ 9068708095, 9066696330, 9071664285 ],\n \"samples_ts\": [ 56.4579, 56.4704, 56.4395 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:15:08Z\",\n \"avg_ns\": 11736583359,\n \"stddev_ns\": 2729237,\n \"avg_ts\": 10.906070,\n \"stddev_ts\": 0.002536,\n \"samples_ns\": [ 11739699161, 11735434815, 11734616101 ],\n \"samples_ts\": [ 10.9032, 10.9071, 10.9079 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:14:31Z", + "avg_ns": 9069022903, + "stddev_ns": 2500708, + "avg_ts": 56.455919, + "stddev_ts": 0.015555, + "samples_ns": [ + 9068708095, + 9066696330, + 9071664285 + ], + "samples_ts": [ + 56.4579, + 56.4704, + 56.4395 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:15:08Z", + "avg_ns": 11736583359, + "stddev_ns": 2729237, + "avg_ts": 10.90607, + "stddev_ts": 0.002536, + "samples_ns": [ + 11739699161, + 11735434815, + 11734616101 + ], + "samples_ts": [ + 10.9032, + 10.9071, + 10.9079 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 674 + }, + { + "timestamp_utc": "2025-12-09T10:18:42.088983+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:15:44Z\",\n \"avg_ns\": 9071227361,\n \"stddev_ns\": 3650991,\n \"avg_ts\": 56.442203,\n \"stddev_ts\": 0.022704,\n \"samples_ns\": [ 9069503003, 9075419797, 9068759284 ],\n \"samples_ts\": [ 56.4529, 56.4161, 56.4576 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:16:20Z\",\n \"avg_ns\": 47071893925,\n \"stddev_ns\": 11940619,\n \"avg_ts\": 10.876979,\n \"stddev_ts\": 0.002759,\n \"samples_ns\": [ 47083063077, 47073310487, 47059308211 ],\n \"samples_ts\": [ 10.8744, 10.8767, 10.8799 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:15:44Z", + "avg_ns": 9071227361, + "stddev_ns": 3650991, + "avg_ts": 56.442203, + "stddev_ts": 0.022704, + "samples_ns": [ + 9069503003, + 9075419797, + 9068759284 + ], + "samples_ts": [ + 56.4529, + 56.4161, + 56.4576 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:16:20Z", + "avg_ns": 47071893925, + "stddev_ns": 11940619, + "avg_ts": 10.876979, + "stddev_ts": 0.002759, + "samples_ns": [ + 47083063077, + 47073310487, + 47059308211 + ], + "samples_ts": [ + 10.8744, + 10.8767, + 10.8799 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 675 + }, + { + "timestamp_utc": "2025-12-09T10:19:27.258918+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:18:43Z\",\n \"avg_ns\": 2256561844,\n \"stddev_ns\": 236111,\n \"avg_ts\": 56.723462,\n \"stddev_ts\": 0.005935,\n \"samples_ns\": [ 2256398854, 2256832612, 2256454066 ],\n \"samples_ts\": [ 56.7276, 56.7167, 56.7262 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:18:52Z\",\n \"avg_ns\": 11675334989,\n \"stddev_ns\": 9386197,\n \"avg_ts\": 10.963288,\n \"stddev_ts\": 0.008810,\n \"samples_ns\": [ 11668838178, 11671070453, 11686096336 ],\n \"samples_ts\": [ 10.9694, 10.9673, 10.9532 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:18:43Z", + "avg_ns": 2256561844, + "stddev_ns": 236111, + "avg_ts": 56.723462, + "stddev_ts": 0.005935, + "samples_ns": [ + 2256398854, + 2256832612, + 2256454066 + ], + "samples_ts": [ + 56.7276, + 56.7167, + 56.7262 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:18:52Z", + "avg_ns": 11675334989, + "stddev_ns": 9386197, + "avg_ts": 10.963288, + "stddev_ts": 0.00881, + "samples_ns": [ + 11668838178, + 11671070453, + 11686096336 + ], + "samples_ts": [ + 10.9694, + 10.9673, + 10.9532 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 676 + }, + { + "timestamp_utc": "2025-12-09T10:21:58.654922+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:19:28Z\",\n \"avg_ns\": 2258043036,\n \"stddev_ns\": 111320,\n \"avg_ts\": 56.686254,\n \"stddev_ts\": 0.002527,\n \"samples_ns\": [ 2258037473, 2257945264, 2258146372 ],\n \"samples_ts\": [ 56.6864, 56.6887, 56.6837 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:19:37Z\",\n \"avg_ns\": 47100176719,\n \"stddev_ns\": 5070058,\n \"avg_ts\": 10.870448,\n \"stddev_ts\": 0.001168,\n \"samples_ns\": [ 47094513859, 47101759032, 47104257268 ],\n \"samples_ts\": [ 10.8718, 10.8701, 10.8695 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:19:28Z", + "avg_ns": 2258043036, + "stddev_ns": 111320, + "avg_ts": 56.686254, + "stddev_ts": 0.002527, + "samples_ns": [ + 2258037473, + 2257945264, + 2258146372 + ], + "samples_ts": [ + 56.6864, + 56.6887, + 56.6837 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:19:37Z", + "avg_ns": 47100176719, + "stddev_ns": 5070058, + "avg_ts": 10.870448, + "stddev_ts": 0.001168, + "samples_ns": [ + 47094513859, + 47101759032, + 47104257268 + ], + "samples_ts": [ + 10.8718, + 10.8701, + 10.8695 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 677 + }, + { + "timestamp_utc": "2025-12-09T10:23:11.012489+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:21:59Z\",\n \"avg_ns\": 9066048241,\n \"stddev_ns\": 909326,\n \"avg_ts\": 56.474441,\n \"stddev_ts\": 0.005665,\n \"samples_ns\": [ 9066294336, 9066809192, 9065041195 ],\n \"samples_ts\": [ 56.4729, 56.4697, 56.4807 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:22:35Z\",\n \"avg_ns\": 11671957644,\n \"stddev_ns\": 2184575,\n \"avg_ts\": 10.966455,\n \"stddev_ts\": 0.002047,\n \"samples_ns\": [ 11670285611, 11671165087, 11674422236 ],\n \"samples_ts\": [ 10.968, 10.9672, 10.9641 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:21:59Z", + "avg_ns": 9066048241, + "stddev_ns": 909326, + "avg_ts": 56.474441, + "stddev_ts": 0.005665, + "samples_ns": [ + 9066294336, + 9066809192, + 9065041195 + ], + "samples_ts": [ + 56.4729, + 56.4697, + 56.4807 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:22:35Z", + "avg_ns": 11671957644, + "stddev_ns": 2184575, + "avg_ts": 10.966455, + "stddev_ts": 0.002047, + "samples_ns": [ + 11670285611, + 11671165087, + 11674422236 + ], + "samples_ts": [ + 10.968, + 10.9672, + 10.9641 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 678 + }, + { + "timestamp_utc": "2025-12-09T10:26:09.814049+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:23:11Z\",\n \"avg_ns\": 9055595516,\n \"stddev_ns\": 3855434,\n \"avg_ts\": 56.539635,\n \"stddev_ts\": 0.024066,\n \"samples_ns\": [ 9060029748, 9053035523, 9053721277 ],\n \"samples_ts\": [ 56.512, 56.5556, 56.5513 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:23:48Z\",\n \"avg_ns\": 47155342709,\n \"stddev_ns\": 15839014,\n \"avg_ts\": 10.857731,\n \"stddev_ts\": 0.003646,\n \"samples_ns\": [ 47147510520, 47173568685, 47144948924 ],\n \"samples_ts\": [ 10.8595, 10.8535, 10.8601 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:23:11Z", + "avg_ns": 9055595516, + "stddev_ns": 3855434, + "avg_ts": 56.539635, + "stddev_ts": 0.024066, + "samples_ns": [ + 9060029748, + 9053035523, + 9053721277 + ], + "samples_ts": [ + 56.512, + 56.5556, + 56.5513 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:23:48Z", + "avg_ns": 47155342709, + "stddev_ns": 15839014, + "avg_ts": 10.857731, + "stddev_ts": 0.003646, + "samples_ns": [ + 47147510520, + 47173568685, + 47144948924 + ], + "samples_ts": [ + 10.8595, + 10.8535, + 10.8601 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 679 + }, + { + "timestamp_utc": "2025-12-09T10:26:54.968440+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:26:10Z\",\n \"avg_ns\": 2256053430,\n \"stddev_ns\": 854329,\n \"avg_ts\": 56.736250,\n \"stddev_ts\": 0.021487,\n \"samples_ns\": [ 2255134726, 2256201549, 2256824015 ],\n \"samples_ts\": [ 56.7594, 56.7325, 56.7169 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:26:19Z\",\n \"avg_ns\": 11685112811,\n \"stddev_ns\": 3368926,\n \"avg_ts\": 10.954110,\n \"stddev_ts\": 0.003156,\n \"samples_ns\": [ 11682564541, 11683843775, 11688930118 ],\n \"samples_ts\": [ 10.9565, 10.9553, 10.9505 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:26:10Z", + "avg_ns": 2256053430, + "stddev_ns": 854329, + "avg_ts": 56.73625, + "stddev_ts": 0.021487, + "samples_ns": [ + 2255134726, + 2256201549, + 2256824015 + ], + "samples_ts": [ + 56.7594, + 56.7325, + 56.7169 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:26:19Z", + "avg_ns": 11685112811, + "stddev_ns": 3368926, + "avg_ts": 10.95411, + "stddev_ts": 0.003156, + "samples_ns": [ + 11682564541, + 11683843775, + 11688930118 + ], + "samples_ts": [ + 10.9565, + 10.9553, + 10.9505 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 680 + }, + { + "timestamp_utc": "2025-12-09T10:29:26.903823+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:26:55Z\",\n \"avg_ns\": 2259404315,\n \"stddev_ns\": 864370,\n \"avg_ts\": 56.652106,\n \"stddev_ts\": 0.021671,\n \"samples_ns\": [ 2258609161, 2259279462, 2260324322 ],\n \"samples_ts\": [ 56.672, 56.6552, 56.629 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:27:04Z\",\n \"avg_ns\": 47271069062,\n \"stddev_ns\": 7471973,\n \"avg_ts\": 10.831149,\n \"stddev_ts\": 0.001712,\n \"samples_ns\": [ 47279655048, 47266040523, 47267511615 ],\n \"samples_ts\": [ 10.8292, 10.8323, 10.832 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:26:55Z", + "avg_ns": 2259404315, + "stddev_ns": 864370, + "avg_ts": 56.652106, + "stddev_ts": 0.021671, + "samples_ns": [ + 2258609161, + 2259279462, + 2260324322 + ], + "samples_ts": [ + 56.672, + 56.6552, + 56.629 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:27:04Z", + "avg_ns": 47271069062, + "stddev_ns": 7471973, + "avg_ts": 10.831149, + "stddev_ts": 0.001712, + "samples_ns": [ + 47279655048, + 47266040523, + 47267511615 + ], + "samples_ts": [ + 10.8292, + 10.8323, + 10.832 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 681 + }, + { + "timestamp_utc": "2025-12-09T10:30:40.614745+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:29:27Z\",\n \"avg_ns\": 9388826656,\n \"stddev_ns\": 2763186,\n \"avg_ts\": 54.532909,\n \"stddev_ts\": 0.016047,\n \"samples_ns\": [ 9387708540, 9386797749, 9391973679 ],\n \"samples_ts\": [ 54.5394, 54.5447, 54.5146 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:30:05Z\",\n \"avg_ns\": 11688459522,\n \"stddev_ns\": 1425193,\n \"avg_ts\": 10.950973,\n \"stddev_ts\": 0.001331,\n \"samples_ns\": [ 11689590217, 11686864304, 11688924046 ],\n \"samples_ts\": [ 10.9499, 10.9525, 10.9505 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:29:27Z", + "avg_ns": 9388826656, + "stddev_ns": 2763186, + "avg_ts": 54.532909, + "stddev_ts": 0.016047, + "samples_ns": [ + 9387708540, + 9386797749, + 9391973679 + ], + "samples_ts": [ + 54.5394, + 54.5447, + 54.5146 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:30:05Z", + "avg_ns": 11688459522, + "stddev_ns": 1425193, + "avg_ts": 10.950973, + "stddev_ts": 0.001331, + "samples_ns": [ + 11689590217, + 11686864304, + 11688924046 + ], + "samples_ts": [ + 10.9499, + 10.9525, + 10.9505 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 682 + }, + { + "timestamp_utc": "2025-12-09T10:33:40.938309+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:30:41Z\",\n \"avg_ns\": 9363922191,\n \"stddev_ns\": 2835847,\n \"avg_ts\": 54.677946,\n \"stddev_ts\": 0.016556,\n \"samples_ns\": [ 9362107788, 9367190110, 9362468675 ],\n \"samples_ts\": [ 54.6885, 54.6589, 54.6864 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:31:19Z\",\n \"avg_ns\": 47245027683,\n \"stddev_ns\": 5803251,\n \"avg_ts\": 10.837119,\n \"stddev_ts\": 0.001330,\n \"samples_ns\": [ 47249448535, 47238461510, 47247173005 ],\n \"samples_ts\": [ 10.8361, 10.8386, 10.8366 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:30:41Z", + "avg_ns": 9363922191, + "stddev_ns": 2835847, + "avg_ts": 54.677946, + "stddev_ts": 0.016556, + "samples_ns": [ + 9362107788, + 9367190110, + 9362468675 + ], + "samples_ts": [ + 54.6885, + 54.6589, + 54.6864 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:31:19Z", + "avg_ns": 47245027683, + "stddev_ns": 5803251, + "avg_ts": 10.837119, + "stddev_ts": 0.00133, + "samples_ns": [ + 47249448535, + 47238461510, + 47247173005 + ], + "samples_ts": [ + 10.8361, + 10.8386, + 10.8366 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 683 + }, + { + "timestamp_utc": "2025-12-09T10:34:27.152968+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:33:41Z\",\n \"avg_ns\": 1751620116,\n \"stddev_ns\": 254656,\n \"avg_ts\": 73.075207,\n \"stddev_ts\": 0.010480,\n \"samples_ns\": [ 1751797656, 1751332706, 1751729987 ],\n \"samples_ts\": [ 73.0678, 73.0872, 73.0706 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:33:48Z\",\n \"avg_ns\": 12708398126,\n \"stddev_ns\": 14796070,\n \"avg_ts\": 10.072089,\n \"stddev_ts\": 0.011732,\n \"samples_ns\": [ 12720314411, 12713042465, 12691837503 ],\n \"samples_ts\": [ 10.0626, 10.0684, 10.0852 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:33:41Z", + "avg_ns": 1751620116, + "stddev_ns": 254656, + "avg_ts": 73.075207, + "stddev_ts": 0.01048, + "samples_ns": [ + 1751797656, + 1751332706, + 1751729987 + ], + "samples_ts": [ + 73.0678, + 73.0872, + 73.0706 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:33:48Z", + "avg_ns": 12708398126, + "stddev_ns": 14796070, + "avg_ts": 10.072089, + "stddev_ts": 0.011732, + "samples_ns": [ + 12720314411, + 12713042465, + 12691837503 + ], + "samples_ts": [ + 10.0626, + 10.0684, + 10.0852 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 684 + }, + { + "timestamp_utc": "2025-12-09T10:37:08.904608+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:34:28Z\",\n \"avg_ns\": 1756886700,\n \"stddev_ns\": 1020893,\n \"avg_ts\": 72.856166,\n \"stddev_ts\": 0.042278,\n \"samples_ns\": [ 1755710437, 1757443027, 1757506638 ],\n \"samples_ts\": [ 72.905, 72.8331, 72.8305 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:34:35Z\",\n \"avg_ns\": 51205246048,\n \"stddev_ns\": 26755146,\n \"avg_ts\": 9.998977,\n \"stddev_ts\": 0.005223,\n \"samples_ns\": [ 51236010500, 51192292163, 51187435483 ],\n \"samples_ts\": [ 9.99297, 10.0015, 10.0025 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:34:28Z", + "avg_ns": 1756886700, + "stddev_ns": 1020893, + "avg_ts": 72.856166, + "stddev_ts": 0.042278, + "samples_ns": [ + 1755710437, + 1757443027, + 1757506638 + ], + "samples_ts": [ + 72.905, + 72.8331, + 72.8305 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:34:35Z", + "avg_ns": 51205246048, + "stddev_ns": 26755146, + "avg_ts": 9.998977, + "stddev_ts": 0.005223, + "samples_ns": [ + 51236010500, + 51192292163, + 51187435483 + ], + "samples_ts": [ + 9.99297, + 10.0015, + 10.0025 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 685 + }, + { + "timestamp_utc": "2025-12-09T10:38:17.515995+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:37:09Z\",\n \"avg_ns\": 7033433834,\n \"stddev_ns\": 2545278,\n \"avg_ts\": 72.795175,\n \"stddev_ts\": 0.026328,\n \"samples_ns\": [ 7036036209, 7033312539, 7030952755 ],\n \"samples_ts\": [ 72.7682, 72.7964, 72.8209 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:37:37Z\",\n \"avg_ns\": 13135315935,\n \"stddev_ns\": 53998134,\n \"avg_ts\": 9.744832,\n \"stddev_ts\": 0.040132,\n \"samples_ns\": [ 13074719556, 13178336456, 13152891793 ],\n \"samples_ts\": [ 9.78988, 9.71291, 9.7317 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:37:09Z", + "avg_ns": 7033433834, + "stddev_ns": 2545278, + "avg_ts": 72.795175, + "stddev_ts": 0.026328, + "samples_ns": [ + 7036036209, + 7033312539, + 7030952755 + ], + "samples_ts": [ + 72.7682, + 72.7964, + 72.8209 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:37:37Z", + "avg_ns": 13135315935, + "stddev_ns": 53998134, + "avg_ts": 9.744832, + "stddev_ts": 0.040132, + "samples_ns": [ + 13074719556, + 13178336456, + 13152891793 + ], + "samples_ts": [ + 9.78988, + 9.71291, + 9.7317 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 686 + }, + { + "timestamp_utc": "2025-12-09T10:41:24.533948+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:38:18Z\",\n \"avg_ns\": 7030323957,\n \"stddev_ns\": 2337061,\n \"avg_ts\": 72.827375,\n \"stddev_ts\": 0.024213,\n \"samples_ns\": [ 7027732307, 7030968345, 7032271219 ],\n \"samples_ts\": [ 72.8542, 72.8207, 72.8072 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:38:46Z\",\n \"avg_ns\": 52602123964,\n \"stddev_ns\": 3199914841,\n \"avg_ts\": 9.735857,\n \"stddev_ts\": 0.188650,\n \"samples_ns\": [ 53206241666, 53161670710, 51438459518 ],\n \"samples_ts\": [ 9.62293, 9.631, 9.95364 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:38:18Z", + "avg_ns": 7030323957, + "stddev_ns": 2337061, + "avg_ts": 72.827375, + "stddev_ts": 0.024213, + "samples_ns": [ + 7027732307, + 7030968345, + 7032271219 + ], + "samples_ts": [ + 72.8542, + 72.8207, + 72.8072 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:38:46Z", + "avg_ns": 52602123964, + "stddev_ns": 3199914841, + "avg_ts": 9.735857, + "stddev_ts": 0.18865, + "samples_ns": [ + 53206241666, + 53161670710, + 51438459518 + ], + "samples_ts": [ + 9.62293, + 9.631, + 9.95364 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 687 + }, + { + "timestamp_utc": "2025-12-09T10:42:10.924680+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:41:25Z\",\n \"avg_ns\": 1754847989,\n \"stddev_ns\": 4827319,\n \"avg_ts\": 72.941158,\n \"stddev_ts\": 0.200324,\n \"samples_ns\": [ 1752113279, 1752009126, 1760421563 ],\n \"samples_ts\": [ 73.0546, 73.059, 72.7099 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:41:32Z\",\n \"avg_ns\": 12760567026,\n \"stddev_ns\": 43714396,\n \"avg_ts\": 10.030981,\n \"stddev_ts\": 0.034316,\n \"samples_ns\": [ 12724849839, 12747536341, 12809314899 ],\n \"samples_ts\": [ 10.0591, 10.0412, 9.99273 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:41:25Z", + "avg_ns": 1754847989, + "stddev_ns": 4827319, + "avg_ts": 72.941158, + "stddev_ts": 0.200324, + "samples_ns": [ + 1752113279, + 1752009126, + 1760421563 + ], + "samples_ts": [ + 73.0546, + 73.059, + 72.7099 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:41:32Z", + "avg_ns": 12760567026, + "stddev_ns": 43714396, + "avg_ts": 10.030981, + "stddev_ts": 0.034316, + "samples_ns": [ + 12724849839, + 12747536341, + 12809314899 + ], + "samples_ts": [ + 10.0591, + 10.0412, + 9.99273 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 688 + }, + { + "timestamp_utc": "2025-12-09T10:44:52.791783+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:42:11Z\",\n \"avg_ns\": 1748579899,\n \"stddev_ns\": 909356,\n \"avg_ts\": 73.202273,\n \"stddev_ts\": 0.037980,\n \"samples_ns\": [ 1749597264, 1748288379, 1747854056 ],\n \"samples_ts\": [ 73.1597, 73.2145, 73.2327 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:42:18Z\",\n \"avg_ns\": 51255599698,\n \"stddev_ns\": 190235125,\n \"avg_ts\": 9.989244,\n \"stddev_ts\": 0.036996,\n \"samples_ns\": [ 51475092160, 51138327091, 51153379844 ],\n \"samples_ts\": [ 9.94656, 10.0121, 10.0091 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:42:11Z", + "avg_ns": 1748579899, + "stddev_ns": 909356, + "avg_ts": 73.202273, + "stddev_ts": 0.03798, + "samples_ns": [ + 1749597264, + 1748288379, + 1747854056 + ], + "samples_ts": [ + 73.1597, + 73.2145, + 73.2327 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:42:18Z", + "avg_ns": 51255599698, + "stddev_ns": 190235125, + "avg_ts": 9.989244, + "stddev_ts": 0.036996, + "samples_ns": [ + 51475092160, + 51138327091, + 51153379844 + ], + "samples_ts": [ + 9.94656, + 10.0121, + 10.0091 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 689 + }, + { + "timestamp_utc": "2025-12-09T10:46:01.710341+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:44:53Z\",\n \"avg_ns\": 7038567718,\n \"stddev_ns\": 9966762,\n \"avg_ts\": 72.742169,\n \"stddev_ts\": 0.102957,\n \"samples_ns\": [ 7036273328, 7029948225, 7049481601 ],\n \"samples_ts\": [ 72.7658, 72.8313, 72.6295 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:45:21Z\",\n \"avg_ns\": 13214337938,\n \"stddev_ns\": 49247073,\n \"avg_ts\": 9.686538,\n \"stddev_ts\": 0.036164,\n \"samples_ns\": [ 13158613494, 13252016001, 13232384320 ],\n \"samples_ts\": [ 9.72747, 9.65891, 9.67324 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:44:53Z", + "avg_ns": 7038567718, + "stddev_ns": 9966762, + "avg_ts": 72.742169, + "stddev_ts": 0.102957, + "samples_ns": [ + 7036273328, + 7029948225, + 7049481601 + ], + "samples_ts": [ + 72.7658, + 72.8313, + 72.6295 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:45:21Z", + "avg_ns": 13214337938, + "stddev_ns": 49247073, + "avg_ts": 9.686538, + "stddev_ts": 0.036164, + "samples_ns": [ + 13158613494, + 13252016001, + 13232384320 + ], + "samples_ts": [ + 9.72747, + 9.65891, + 9.67324 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 690 + }, + { + "timestamp_utc": "2025-12-09T10:49:10.287652+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:46:02Z\",\n \"avg_ns\": 7039636044,\n \"stddev_ns\": 6097203,\n \"avg_ts\": 72.731069,\n \"stddev_ts\": 0.062996,\n \"samples_ns\": [ 7040205630, 7045427890, 7033274613 ],\n \"samples_ts\": [ 72.7251, 72.6712, 72.7968 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:46:30Z\",\n \"avg_ns\": 53104626552,\n \"stddev_ns\": 25736823,\n \"avg_ts\": 9.641346,\n \"stddev_ts\": 0.004674,\n \"samples_ns\": [ 53074913138, 53119818818, 53119147702 ],\n \"samples_ts\": [ 9.64674, 9.63859, 9.63871 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:46:02Z", + "avg_ns": 7039636044, + "stddev_ns": 6097203, + "avg_ts": 72.731069, + "stddev_ts": 0.062996, + "samples_ns": [ + 7040205630, + 7045427890, + 7033274613 + ], + "samples_ts": [ + 72.7251, + 72.6712, + 72.7968 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:46:30Z", + "avg_ns": 53104626552, + "stddev_ns": 25736823, + "avg_ts": 9.641346, + "stddev_ts": 0.004674, + "samples_ns": [ + 53074913138, + 53119818818, + 53119147702 + ], + "samples_ts": [ + 9.64674, + 9.63859, + 9.63871 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 691 + }, + { + "timestamp_utc": "2025-12-09T10:49:57.734058+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:49:11Z\",\n \"avg_ns\": 1752490193,\n \"stddev_ns\": 281712,\n \"avg_ts\": 73.038926,\n \"stddev_ts\": 0.011480,\n \"samples_ns\": [ 1752176803, 1752693757, 1752600021 ],\n \"samples_ts\": [ 73.052, 73.0304, 73.0343 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:49:18Z\",\n \"avg_ns\": 13108761158,\n \"stddev_ns\": 125679573,\n \"avg_ts\": 9.765064,\n \"stddev_ts\": 0.094141,\n \"samples_ns\": [ 12963698950, 13177680373, 13184904153 ],\n \"samples_ts\": [ 9.87373, 9.71339, 9.70807 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:49:11Z", + "avg_ns": 1752490193, + "stddev_ns": 281712, + "avg_ts": 73.038926, + "stddev_ts": 0.01148, + "samples_ns": [ + 1752176803, + 1752693757, + 1752600021 + ], + "samples_ts": [ + 73.052, + 73.0304, + 73.0343 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:49:18Z", + "avg_ns": 13108761158, + "stddev_ns": 125679573, + "avg_ts": 9.765064, + "stddev_ts": 0.094141, + "samples_ns": [ + 12963698950, + 13177680373, + 13184904153 + ], + "samples_ts": [ + 9.87373, + 9.71339, + 9.70807 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 692 + }, + { + "timestamp_utc": "2025-12-09T10:52:44.853034+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:49:58Z\",\n \"avg_ns\": 1754851739,\n \"stddev_ns\": 1680807,\n \"avg_ts\": 72.940679,\n \"stddev_ts\": 0.069832,\n \"samples_ns\": [ 1753554810, 1754249765, 1756750642 ],\n \"samples_ts\": [ 72.9946, 72.9657, 72.8618 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:50:05Z\",\n \"avg_ns\": 52994504537,\n \"stddev_ns\": 244506680,\n \"avg_ts\": 9.661516,\n \"stddev_ts\": 0.044579,\n \"samples_ns\": [ 52996034687, 53238242552, 52749236372 ],\n \"samples_ts\": [ 9.6611, 9.61715, 9.7063 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:49:58Z", + "avg_ns": 1754851739, + "stddev_ns": 1680807, + "avg_ts": 72.940679, + "stddev_ts": 0.069832, + "samples_ns": [ + 1753554810, + 1754249765, + 1756750642 + ], + "samples_ts": [ + 72.9946, + 72.9657, + 72.8618 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:50:05Z", + "avg_ns": 52994504537, + "stddev_ns": 244506680, + "avg_ts": 9.661516, + "stddev_ts": 0.044579, + "samples_ns": [ + 52996034687, + 53238242552, + 52749236372 + ], + "samples_ts": [ + 9.6611, + 9.61715, + 9.7063 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 693 + }, + { + "timestamp_utc": "2025-12-09T10:53:54.933837+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:52:45Z\",\n \"avg_ns\": 7334849430,\n \"stddev_ns\": 6547289,\n \"avg_ts\": 69.803788,\n \"stddev_ts\": 0.062341,\n \"samples_ns\": [ 7338599410, 7338659529, 7327289351 ],\n \"samples_ts\": [ 69.7681, 69.7675, 69.8758 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:53:15Z\",\n \"avg_ns\": 13216971336,\n \"stddev_ns\": 10766329,\n \"avg_ts\": 9.684523,\n \"stddev_ts\": 0.007888,\n \"samples_ns\": [ 13206041150, 13227563386, 13217309474 ],\n \"samples_ts\": [ 9.69253, 9.67676, 9.68427 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:52:45Z", + "avg_ns": 7334849430, + "stddev_ns": 6547289, + "avg_ts": 69.803788, + "stddev_ts": 0.062341, + "samples_ns": [ + 7338599410, + 7338659529, + 7327289351 + ], + "samples_ts": [ + 69.7681, + 69.7675, + 69.8758 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:53:15Z", + "avg_ns": 13216971336, + "stddev_ns": 10766329, + "avg_ts": 9.684523, + "stddev_ts": 0.007888, + "samples_ns": [ + 13206041150, + 13227563386, + 13217309474 + ], + "samples_ts": [ + 9.69253, + 9.67676, + 9.68427 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 694 + }, + { + "timestamp_utc": "2025-12-09T10:57:04.951340+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:53:55Z\",\n \"avg_ns\": 7342258968,\n \"stddev_ns\": 1857917,\n \"avg_ts\": 69.733310,\n \"stddev_ts\": 0.017624,\n \"samples_ns\": [ 7341209554, 7341165478, 7344401873 ],\n \"samples_ts\": [ 69.7433, 69.7437, 69.713 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:54:25Z\",\n \"avg_ns\": 53183314247,\n \"stddev_ns\": 93013700,\n \"avg_ts\": 9.627099,\n \"stddev_ts\": 0.016854,\n \"samples_ns\": [ 53076177158, 53243415415, 53230350170 ],\n \"samples_ts\": [ 9.64651, 9.61621, 9.61857 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:53:55Z", + "avg_ns": 7342258968, + "stddev_ns": 1857917, + "avg_ts": 69.73331, + "stddev_ts": 0.017624, + "samples_ns": [ + 7341209554, + 7341165478, + 7344401873 + ], + "samples_ts": [ + 69.7433, + 69.7437, + 69.713 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:54:25Z", + "avg_ns": 53183314247, + "stddev_ns": 93013700, + "avg_ts": 9.627099, + "stddev_ts": 0.016854, + "samples_ns": [ + 53076177158, + 53243415415, + 53230350170 + ], + "samples_ts": [ + 9.64651, + 9.61621, + 9.61857 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 695 + }, + { + "timestamp_utc": "2025-12-09T10:57:52.499589+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:57:05Z\",\n \"avg_ns\": 1749937281,\n \"stddev_ns\": 877845,\n \"avg_ts\": 73.145491,\n \"stddev_ts\": 0.036620,\n \"samples_ns\": [ 1750337758, 1750541299, 1748932788 ],\n \"samples_ts\": [ 73.1287, 73.1202, 73.1875 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:57:12Z\",\n \"avg_ns\": 13149563702,\n \"stddev_ns\": 152752649,\n \"avg_ts\": 9.735045,\n \"stddev_ts\": 0.113841,\n \"samples_ns\": [ 12973451599, 13229148189, 13246091318 ],\n \"samples_ts\": [ 9.8663, 9.6756, 9.66323 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:57:05Z", + "avg_ns": 1749937281, + "stddev_ns": 877845, + "avg_ts": 73.145491, + "stddev_ts": 0.03662, + "samples_ns": [ + 1750337758, + 1750541299, + 1748932788 + ], + "samples_ts": [ + 73.1287, + 73.1202, + 73.1875 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T10:57:12Z", + "avg_ns": 13149563702, + "stddev_ns": 152752649, + "avg_ts": 9.735045, + "stddev_ts": 0.113841, + "samples_ns": [ + 12973451599, + 13229148189, + 13246091318 + ], + "samples_ts": [ + 9.8663, + 9.6756, + 9.66323 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 696 + }, + { + "timestamp_utc": "2025-12-09T11:00:39.693715+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:57:53Z\",\n \"avg_ns\": 1750226786,\n \"stddev_ns\": 1746915,\n \"avg_ts\": 73.133428,\n \"stddev_ts\": 0.072983,\n \"samples_ns\": [ 1752066626, 1750023075, 1748590657 ],\n \"samples_ts\": [ 73.0566, 73.1419, 73.2018 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T10:58:00Z\",\n \"avg_ns\": 53035913944,\n \"stddev_ns\": 223638116,\n \"avg_ts\": 9.653950,\n \"stddev_ts\": 0.040706,\n \"samples_ns\": [ 53033648931, 53260675728, 52813417175 ],\n \"samples_ts\": [ 9.65425, 9.6131, 9.69451 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T10:57:53Z", + "avg_ns": 1750226786, + "stddev_ns": 1746915, + "avg_ts": 73.133428, + "stddev_ts": 0.072983, + "samples_ns": [ + 1752066626, + 1750023075, + 1748590657 + ], + "samples_ts": [ + 73.0566, + 73.1419, + 73.2018 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T10:58:00Z", + "avg_ns": 53035913944, + "stddev_ns": 223638116, + "avg_ts": 9.65395, + "stddev_ts": 0.040706, + "samples_ns": [ + 53033648931, + 53260675728, + 52813417175 + ], + "samples_ts": [ + 9.65425, + 9.6131, + 9.69451 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 697 + }, + { + "timestamp_utc": "2025-12-09T11:01:48.395864+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:00:40Z\",\n \"avg_ns\": 7019470427,\n \"stddev_ns\": 1284640,\n \"avg_ts\": 72.939977,\n \"stddev_ts\": 0.013349,\n \"samples_ns\": [ 7020756532, 7018187256, 7019467493 ],\n \"samples_ts\": [ 72.9266, 72.9533, 72.94 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:01:08Z\",\n \"avg_ns\": 13180507405,\n \"stddev_ns\": 62837116,\n \"avg_ts\": 9.711458,\n \"stddev_ts\": 0.046420,\n \"samples_ns\": [ 13108351234, 13223188916, 13209982066 ],\n \"samples_ts\": [ 9.76477, 9.67996, 9.68964 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:00:40Z", + "avg_ns": 7019470427, + "stddev_ns": 1284640, + "avg_ts": 72.939977, + "stddev_ts": 0.013349, + "samples_ns": [ + 7020756532, + 7018187256, + 7019467493 + ], + "samples_ts": [ + 72.9266, + 72.9533, + 72.94 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:01:08Z", + "avg_ns": 13180507405, + "stddev_ns": 62837116, + "avg_ts": 9.711458, + "stddev_ts": 0.04642, + "samples_ns": [ + 13108351234, + 13223188916, + 13209982066 + ], + "samples_ts": [ + 9.76477, + 9.67996, + 9.68964 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 698 + }, + { + "timestamp_utc": "2025-12-09T11:04:56.124407+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:01:49Z\",\n \"avg_ns\": 7016584117,\n \"stddev_ns\": 5701732,\n \"avg_ts\": 72.970012,\n \"stddev_ts\": 0.059258,\n \"samples_ns\": [ 7023088449, 7014207153, 7012456751 ],\n \"samples_ts\": [ 72.9024, 72.9947, 73.0129 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:02:17Z\",\n \"avg_ns\": 52854586098,\n \"stddev_ns\": 458170723,\n \"avg_ts\": 9.687443,\n \"stddev_ts\": 0.084377,\n \"samples_ns\": [ 53166178305, 53069064328, 52328515662 ],\n \"samples_ts\": [ 9.63018, 9.64781, 9.78434 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:01:49Z", + "avg_ns": 7016584117, + "stddev_ns": 5701732, + "avg_ts": 72.970012, + "stddev_ts": 0.059258, + "samples_ns": [ + 7023088449, + 7014207153, + 7012456751 + ], + "samples_ts": [ + 72.9024, + 72.9947, + 73.0129 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:02:17Z", + "avg_ns": 52854586098, + "stddev_ns": 458170723, + "avg_ts": 9.687443, + "stddev_ts": 0.084377, + "samples_ns": [ + 53166178305, + 53069064328, + 52328515662 + ], + "samples_ts": [ + 9.63018, + 9.64781, + 9.78434 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 699 + }, + { + "timestamp_utc": "2025-12-09T11:05:42.333711+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:04:57Z\",\n \"avg_ns\": 1758325919,\n \"stddev_ns\": 1284162,\n \"avg_ts\": 72.796541,\n \"stddev_ts\": 0.053087,\n \"samples_ns\": [ 1757573616, 1759807099, 1757597044 ],\n \"samples_ts\": [ 72.8277, 72.7352, 72.8267 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:05:04Z\",\n \"avg_ns\": 12683269970,\n \"stddev_ns\": 26818526,\n \"avg_ts\": 10.092065,\n \"stddev_ts\": 0.021365,\n \"samples_ns\": [ 12699470445, 12698025071, 12652314396 ],\n \"samples_ts\": [ 10.0792, 10.0803, 10.1167 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:04:57Z", + "avg_ns": 1758325919, + "stddev_ns": 1284162, + "avg_ts": 72.796541, + "stddev_ts": 0.053087, + "samples_ns": [ + 1757573616, + 1759807099, + 1757597044 + ], + "samples_ts": [ + 72.8277, + 72.7352, + 72.8267 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:05:04Z", + "avg_ns": 12683269970, + "stddev_ns": 26818526, + "avg_ts": 10.092065, + "stddev_ts": 0.021365, + "samples_ns": [ + 12699470445, + 12698025071, + 12652314396 + ], + "samples_ts": [ + 10.0792, + 10.0803, + 10.1167 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 700 + }, + { + "timestamp_utc": "2025-12-09T11:08:24.232798+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:05:43Z\",\n \"avg_ns\": 1752107690,\n \"stddev_ns\": 1228524,\n \"avg_ts\": 73.054894,\n \"stddev_ts\": 0.051185,\n \"samples_ns\": [ 1750694709, 1752722634, 1752905729 ],\n \"samples_ts\": [ 73.1138, 73.0292, 73.0216 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:05:50Z\",\n \"avg_ns\": 51269782210,\n \"stddev_ns\": 11826268,\n \"avg_ts\": 9.986390,\n \"stddev_ts\": 0.002304,\n \"samples_ns\": [ 51276200011, 51277012155, 51256134464 ],\n \"samples_ts\": [ 9.98514, 9.98498, 9.98905 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:05:43Z", + "avg_ns": 1752107690, + "stddev_ns": 1228524, + "avg_ts": 73.054894, + "stddev_ts": 0.051185, + "samples_ns": [ + 1750694709, + 1752722634, + 1752905729 + ], + "samples_ts": [ + 73.1138, + 73.0292, + 73.0216 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:05:50Z", + "avg_ns": 51269782210, + "stddev_ns": 11826268, + "avg_ts": 9.98639, + "stddev_ts": 0.002304, + "samples_ns": [ + 51276200011, + 51277012155, + 51256134464 + ], + "samples_ts": [ + 9.98514, + 9.98498, + 9.98905 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 701 + }, + { + "timestamp_utc": "2025-12-09T11:09:33.080535+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:08:25Z\",\n \"avg_ns\": 7034377102,\n \"stddev_ns\": 3431274,\n \"avg_ts\": 72.785418,\n \"stddev_ts\": 0.035496,\n \"samples_ns\": [ 7038236848, 7033222092, 7031672366 ],\n \"samples_ts\": [ 72.7455, 72.7974, 72.8134 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:08:53Z\",\n \"avg_ns\": 13207981928,\n \"stddev_ns\": 57774009,\n \"avg_ts\": 9.691233,\n \"stddev_ts\": 0.042486,\n \"samples_ns\": [ 13142110506, 13231777630, 13250057649 ],\n \"samples_ts\": [ 9.73968, 9.67368, 9.66034 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:08:25Z", + "avg_ns": 7034377102, + "stddev_ns": 3431274, + "avg_ts": 72.785418, + "stddev_ts": 0.035496, + "samples_ns": [ + 7038236848, + 7033222092, + 7031672366 + ], + "samples_ts": [ + 72.7455, + 72.7974, + 72.8134 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:08:53Z", + "avg_ns": 13207981928, + "stddev_ns": 57774009, + "avg_ts": 9.691233, + "stddev_ts": 0.042486, + "samples_ns": [ + 13142110506, + 13231777630, + 13250057649 + ], + "samples_ts": [ + 9.73968, + 9.67368, + 9.66034 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 702 + }, + { + "timestamp_utc": "2025-12-09T11:12:40.785952+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:09:33Z\",\n \"avg_ns\": 7032617048,\n \"stddev_ns\": 1727925,\n \"avg_ts\": 72.803626,\n \"stddev_ts\": 0.017844,\n \"samples_ns\": [ 7031326023, 7031950463, 7034574660 ],\n \"samples_ts\": [ 72.817, 72.8105, 72.7834 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:10:02Z\",\n \"avg_ns\": 52821912804,\n \"stddev_ns\": 961701758,\n \"avg_ts\": 9.695112,\n \"stddev_ts\": 0.178380,\n \"samples_ns\": [ 53344729137, 53408954744, 51712054531 ],\n \"samples_ts\": [ 9.59795, 9.58641, 9.90098 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:09:33Z", + "avg_ns": 7032617048, + "stddev_ns": 1727925, + "avg_ts": 72.803626, + "stddev_ts": 0.017844, + "samples_ns": [ + 7031326023, + 7031950463, + 7034574660 + ], + "samples_ts": [ + 72.817, + 72.8105, + 72.7834 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:10:02Z", + "avg_ns": 52821912804, + "stddev_ns": 961701758, + "avg_ts": 9.695112, + "stddev_ts": 0.17838, + "samples_ns": [ + 53344729137, + 53408954744, + 51712054531 + ], + "samples_ts": [ + 9.59795, + 9.58641, + 9.90098 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 703 + }, + { + "timestamp_utc": "2025-12-09T11:13:26.952097+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:12:41Z\",\n \"avg_ns\": 1752174177,\n \"stddev_ns\": 1656717,\n \"avg_ts\": 73.052142,\n \"stddev_ts\": 0.069107,\n \"samples_ns\": [ 1752887858, 1750280226, 1753354447 ],\n \"samples_ts\": [ 73.0224, 73.1311, 73.0029 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:12:48Z\",\n \"avg_ns\": 12690966157,\n \"stddev_ns\": 31332573,\n \"avg_ts\": 10.085955,\n \"stddev_ts\": 0.024865,\n \"samples_ns\": [ 12727140645, 12672369354, 12673388474 ],\n \"samples_ts\": [ 10.0572, 10.1007, 10.0999 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:12:41Z", + "avg_ns": 1752174177, + "stddev_ns": 1656717, + "avg_ts": 73.052142, + "stddev_ts": 0.069107, + "samples_ns": [ + 1752887858, + 1750280226, + 1753354447 + ], + "samples_ts": [ + 73.0224, + 73.1311, + 73.0029 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:12:48Z", + "avg_ns": 12690966157, + "stddev_ns": 31332573, + "avg_ts": 10.085955, + "stddev_ts": 0.024865, + "samples_ns": [ + 12727140645, + 12672369354, + 12673388474 + ], + "samples_ts": [ + 10.0572, + 10.1007, + 10.0999 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 704 + }, + { + "timestamp_utc": "2025-12-09T11:16:10.615351+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:13:27Z\",\n \"avg_ns\": 1747614538,\n \"stddev_ns\": 733987,\n \"avg_ts\": 73.242704,\n \"stddev_ts\": 0.030654,\n \"samples_ns\": [ 1747128880, 1747258750, 1748455986 ],\n \"samples_ts\": [ 73.2631, 73.2576, 73.2074 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:13:34Z\",\n \"avg_ns\": 51856231091,\n \"stddev_ns\": 599969046,\n \"avg_ts\": 9.874328,\n \"stddev_ts\": 0.113559,\n \"samples_ns\": [ 51600708589, 52541660696, 51426323989 ],\n \"samples_ts\": [ 9.92234, 9.74465, 9.95599 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:13:27Z", + "avg_ns": 1747614538, + "stddev_ns": 733987, + "avg_ts": 73.242704, + "stddev_ts": 0.030654, + "samples_ns": [ + 1747128880, + 1747258750, + 1748455986 + ], + "samples_ts": [ + 73.2631, + 73.2576, + 73.2074 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:13:34Z", + "avg_ns": 51856231091, + "stddev_ns": 599969046, + "avg_ts": 9.874328, + "stddev_ts": 0.113559, + "samples_ns": [ + 51600708589, + 52541660696, + 51426323989 + ], + "samples_ts": [ + 9.92234, + 9.74465, + 9.95599 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 705 + }, + { + "timestamp_utc": "2025-12-09T11:17:20.841509+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:16:11Z\",\n \"avg_ns\": 7332662880,\n \"stddev_ns\": 2510126,\n \"avg_ts\": 69.824571,\n \"stddev_ts\": 0.023879,\n \"samples_ns\": [ 7329781311, 7333861826, 7334345505 ],\n \"samples_ts\": [ 69.852, 69.8132, 69.8085 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:16:40Z\",\n \"avg_ns\": 13265898169,\n \"stddev_ns\": 16672099,\n \"avg_ts\": 9.648810,\n \"stddev_ts\": 0.012133,\n \"samples_ns\": [ 13247236873, 13279324660, 13271132974 ],\n \"samples_ts\": [ 9.66239, 9.63904, 9.64499 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:16:11Z", + "avg_ns": 7332662880, + "stddev_ns": 2510126, + "avg_ts": 69.824571, + "stddev_ts": 0.023879, + "samples_ns": [ + 7329781311, + 7333861826, + 7334345505 + ], + "samples_ts": [ + 69.852, + 69.8132, + 69.8085 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:16:40Z", + "avg_ns": 13265898169, + "stddev_ns": 16672099, + "avg_ts": 9.64881, + "stddev_ts": 0.012133, + "samples_ns": [ + 13247236873, + 13279324660, + 13271132974 + ], + "samples_ts": [ + 9.66239, + 9.63904, + 9.64499 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 706 + }, + { + "timestamp_utc": "2025-12-09T11:20:30.711688+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:17:21Z\",\n \"avg_ns\": 7333705304,\n \"stddev_ns\": 4249077,\n \"avg_ts\": 69.814656,\n \"stddev_ts\": 0.040433,\n \"samples_ns\": [ 7338412766, 7330156929, 7332546218 ],\n \"samples_ts\": [ 69.7699, 69.8484, 69.8257 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:17:51Z\",\n \"avg_ns\": 53136037074,\n \"stddev_ns\": 87931925,\n \"avg_ts\": 9.635663,\n \"stddev_ts\": 0.015942,\n \"samples_ns\": [ 53128420418, 53227529571, 53052161233 ],\n \"samples_ts\": [ 9.63703, 9.61908, 9.65088 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:17:21Z", + "avg_ns": 7333705304, + "stddev_ns": 4249077, + "avg_ts": 69.814656, + "stddev_ts": 0.040433, + "samples_ns": [ + 7338412766, + 7330156929, + 7332546218 + ], + "samples_ts": [ + 69.7699, + 69.8484, + 69.8257 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:17:51Z", + "avg_ns": 53136037074, + "stddev_ns": 87931925, + "avg_ts": 9.635663, + "stddev_ts": 0.015942, + "samples_ns": [ + 53128420418, + 53227529571, + 53052161233 + ], + "samples_ts": [ + 9.63703, + 9.61908, + 9.65088 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 707 + }, + { + "timestamp_utc": "2025-12-09T11:21:18.014710+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:20:31Z\",\n \"avg_ns\": 1759353249,\n \"stddev_ns\": 1541222,\n \"avg_ts\": 72.754045,\n \"stddev_ts\": 0.063687,\n \"samples_ns\": [ 1758869976, 1761077601, 1758112171 ],\n \"samples_ts\": [ 72.774, 72.6828, 72.8054 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:20:38Z\",\n \"avg_ns\": 13057839779,\n \"stddev_ns\": 162042293,\n \"avg_ts\": 9.803554,\n \"stddev_ts\": 0.122509,\n \"samples_ns\": [ 12871374565, 13164516152, 13137628621 ],\n \"samples_ts\": [ 9.94455, 9.72311, 9.74301 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:20:31Z", + "avg_ns": 1759353249, + "stddev_ns": 1541222, + "avg_ts": 72.754045, + "stddev_ts": 0.063687, + "samples_ns": [ + 1758869976, + 1761077601, + 1758112171 + ], + "samples_ts": [ + 72.774, + 72.6828, + 72.8054 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:20:38Z", + "avg_ns": 13057839779, + "stddev_ns": 162042293, + "avg_ts": 9.803554, + "stddev_ts": 0.122509, + "samples_ns": [ + 12871374565, + 13164516152, + 13137628621 + ], + "samples_ts": [ + 9.94455, + 9.72311, + 9.74301 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 708 + }, + { + "timestamp_utc": "2025-12-09T11:24:02.838475+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:21:18Z\",\n \"avg_ns\": 1761057826,\n \"stddev_ns\": 3141067,\n \"avg_ts\": 72.683741,\n \"stddev_ts\": 0.129608,\n \"samples_ns\": [ 1758075382, 1760761570, 1764336526 ],\n \"samples_ts\": [ 72.8069, 72.6958, 72.5485 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:21:25Z\",\n \"avg_ns\": 52230620541,\n \"stddev_ns\": 819613570,\n \"avg_ts\": 9.804296,\n \"stddev_ts\": 0.154614,\n \"samples_ns\": [ 52947253321, 52407649318, 51336958985 ],\n \"samples_ts\": [ 9.67, 9.76957, 9.97332 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:21:18Z", + "avg_ns": 1761057826, + "stddev_ns": 3141067, + "avg_ts": 72.683741, + "stddev_ts": 0.129608, + "samples_ns": [ + 1758075382, + 1760761570, + 1764336526 + ], + "samples_ts": [ + 72.8069, + 72.6958, + 72.5485 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:21:25Z", + "avg_ns": 52230620541, + "stddev_ns": 819613570, + "avg_ts": 9.804296, + "stddev_ts": 0.154614, + "samples_ns": [ + 52947253321, + 52407649318, + 51336958985 + ], + "samples_ts": [ + 9.67, + 9.76957, + 9.97332 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 709 + }, + { + "timestamp_utc": "2025-12-09T11:25:11.426146+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:24:03Z\",\n \"avg_ns\": 7032494637,\n \"stddev_ns\": 2520213,\n \"avg_ts\": 72.804896,\n \"stddev_ts\": 0.026057,\n \"samples_ns\": [ 7030856659, 7035393334, 7031233920 ],\n \"samples_ts\": [ 72.8219, 72.7749, 72.8179 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:24:31Z\",\n \"avg_ns\": 13121681900,\n \"stddev_ns\": 82247686,\n \"avg_ts\": 9.755103,\n \"stddev_ts\": 0.061367,\n \"samples_ns\": [ 13026737340, 13171104436, 13167203926 ],\n \"samples_ts\": [ 9.82594, 9.71824, 9.72112 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:24:03Z", + "avg_ns": 7032494637, + "stddev_ns": 2520213, + "avg_ts": 72.804896, + "stddev_ts": 0.026057, + "samples_ns": [ + 7030856659, + 7035393334, + 7031233920 + ], + "samples_ts": [ + 72.8219, + 72.7749, + 72.8179 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:24:31Z", + "avg_ns": 13121681900, + "stddev_ns": 82247686, + "avg_ts": 9.755103, + "stddev_ts": 0.061367, + "samples_ns": [ + 13026737340, + 13171104436, + 13167203926 + ], + "samples_ts": [ + 9.82594, + 9.71824, + 9.72112 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 710 + }, + { + "timestamp_utc": "2025-12-09T11:28:20.285373+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:25:12Z\",\n \"avg_ns\": 7029995595,\n \"stddev_ns\": 2762532,\n \"avg_ts\": 72.830779,\n \"stddev_ts\": 0.028613,\n \"samples_ns\": [ 7033181617, 7028266447, 7028538721 ],\n \"samples_ts\": [ 72.7978, 72.8487, 72.8459 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:25:40Z\",\n \"avg_ns\": 53215002274,\n \"stddev_ns\": 68948605,\n \"avg_ts\": 9.621358,\n \"stddev_ts\": 0.012475,\n \"samples_ns\": [ 53135803391, 53247569191, 53261634242 ],\n \"samples_ts\": [ 9.63569, 9.61546, 9.61292 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:25:12Z", + "avg_ns": 7029995595, + "stddev_ns": 2762532, + "avg_ts": 72.830779, + "stddev_ts": 0.028613, + "samples_ns": [ + 7033181617, + 7028266447, + 7028538721 + ], + "samples_ts": [ + 72.7978, + 72.8487, + 72.8459 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:25:40Z", + "avg_ns": 53215002274, + "stddev_ns": 68948605, + "avg_ts": 9.621358, + "stddev_ts": 0.012475, + "samples_ns": [ + 53135803391, + 53247569191, + 53261634242 + ], + "samples_ts": [ + 9.63569, + 9.61546, + 9.61292 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 711 + }, + { + "timestamp_utc": "2025-12-09T11:29:07.817672+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:28:21Z\",\n \"avg_ns\": 1753749821,\n \"stddev_ns\": 372872,\n \"avg_ts\": 72.986467,\n \"stddev_ts\": 0.015518,\n \"samples_ns\": [ 1753772911, 1753365940, 1754110612 ],\n \"samples_ts\": [ 72.9855, 73.0024, 72.9715 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:28:28Z\",\n \"avg_ns\": 13142420216,\n \"stddev_ns\": 153019418,\n \"avg_ts\": 9.740340,\n \"stddev_ts\": 0.114176,\n \"samples_ns\": [ 12965733681, 13229615783, 13231911186 ],\n \"samples_ts\": [ 9.87218, 9.67526, 9.67358 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:28:21Z", + "avg_ns": 1753749821, + "stddev_ns": 372872, + "avg_ts": 72.986467, + "stddev_ts": 0.015518, + "samples_ns": [ + 1753772911, + 1753365940, + 1754110612 + ], + "samples_ts": [ + 72.9855, + 73.0024, + 72.9715 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:28:28Z", + "avg_ns": 13142420216, + "stddev_ns": 153019418, + "avg_ts": 9.74034, + "stddev_ts": 0.114176, + "samples_ns": [ + 12965733681, + 13229615783, + 13231911186 + ], + "samples_ts": [ + 9.87218, + 9.67526, + 9.67358 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 712 + }, + { + "timestamp_utc": "2025-12-09T11:31:52.119323+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:29:08Z\",\n \"avg_ns\": 1756596793,\n \"stddev_ns\": 1263263,\n \"avg_ts\": 72.868199,\n \"stddev_ts\": 0.052349,\n \"samples_ns\": [ 1757823636, 1755302590, 1756664155 ],\n \"samples_ts\": [ 72.8173, 72.9219, 72.8654 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:29:15Z\",\n \"avg_ns\": 52059580331,\n \"stddev_ns\": 822595033,\n \"avg_ts\": 9.836527,\n \"stddev_ts\": 0.155870,\n \"samples_ns\": [ 52829965096, 52155577124, 51193198774 ],\n \"samples_ts\": [ 9.69147, 9.81678, 10.0013 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:29:08Z", + "avg_ns": 1756596793, + "stddev_ns": 1263263, + "avg_ts": 72.868199, + "stddev_ts": 0.052349, + "samples_ns": [ + 1757823636, + 1755302590, + 1756664155 + ], + "samples_ts": [ + 72.8173, + 72.9219, + 72.8654 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:29:15Z", + "avg_ns": 52059580331, + "stddev_ns": 822595033, + "avg_ts": 9.836527, + "stddev_ts": 0.15587, + "samples_ns": [ + 52829965096, + 52155577124, + 51193198774 + ], + "samples_ts": [ + 9.69147, + 9.81678, + 10.0013 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 713 + }, + { + "timestamp_utc": "2025-12-09T11:33:00.937217+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:31:53Z\",\n \"avg_ns\": 7051597892,\n \"stddev_ns\": 3072229,\n \"avg_ts\": 72.607666,\n \"stddev_ts\": 0.031620,\n \"samples_ns\": [ 7054825030, 7048711246, 7051257401 ],\n \"samples_ts\": [ 72.5744, 72.6374, 72.6112 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:32:21Z\",\n \"avg_ns\": 13163435010,\n \"stddev_ns\": 74467810,\n \"avg_ts\": 9.724114,\n \"stddev_ts\": 0.055168,\n \"samples_ns\": [ 13078701320, 13218474383, 13193129328 ],\n \"samples_ts\": [ 9.7869, 9.68342, 9.70202 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:31:53Z", + "avg_ns": 7051597892, + "stddev_ns": 3072229, + "avg_ts": 72.607666, + "stddev_ts": 0.03162, + "samples_ns": [ + 7054825030, + 7048711246, + 7051257401 + ], + "samples_ts": [ + 72.5744, + 72.6374, + 72.6112 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:32:21Z", + "avg_ns": 13163435010, + "stddev_ns": 74467810, + "avg_ts": 9.724114, + "stddev_ts": 0.055168, + "samples_ns": [ + 13078701320, + 13218474383, + 13193129328 + ], + "samples_ts": [ + 9.7869, + 9.68342, + 9.70202 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 714 + }, + { + "timestamp_utc": "2025-12-09T11:36:09.064996+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:33:01Z\",\n \"avg_ns\": 7043573336,\n \"stddev_ns\": 3512732,\n \"avg_ts\": 72.690389,\n \"stddev_ts\": 0.036247,\n \"samples_ns\": [ 7047351480, 7040406175, 7042962353 ],\n \"samples_ts\": [ 72.6514, 72.7231, 72.6967 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:33:30Z\",\n \"avg_ns\": 52947628174,\n \"stddev_ns\": 608563264,\n \"avg_ts\": 9.670790,\n \"stddev_ts\": 0.111893,\n \"samples_ns\": [ 53281989703, 53315705215, 52245189606 ],\n \"samples_ts\": [ 9.60925, 9.60317, 9.79995 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:33:01Z", + "avg_ns": 7043573336, + "stddev_ns": 3512732, + "avg_ts": 72.690389, + "stddev_ts": 0.036247, + "samples_ns": [ + 7047351480, + 7040406175, + 7042962353 + ], + "samples_ts": [ + 72.6514, + 72.7231, + 72.6967 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:33:30Z", + "avg_ns": 52947628174, + "stddev_ns": 608563264, + "avg_ts": 9.67079, + "stddev_ts": 0.111893, + "samples_ns": [ + 53281989703, + 53315705215, + 52245189606 + ], + "samples_ts": [ + 9.60925, + 9.60317, + 9.79995 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 715 + }, + { + "timestamp_utc": "2025-12-09T11:36:55.219727+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:36:09Z\",\n \"avg_ns\": 1758172778,\n \"stddev_ns\": 1064741,\n \"avg_ts\": 72.802874,\n \"stddev_ts\": 0.044046,\n \"samples_ns\": [ 1759334853, 1757246628, 1757936854 ],\n \"samples_ts\": [ 72.7548, 72.8412, 72.8126 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:36:16Z\",\n \"avg_ns\": 12680027422,\n \"stddev_ns\": 14121531,\n \"avg_ts\": 10.094624,\n \"stddev_ts\": 0.011249,\n \"samples_ns\": [ 12689478456, 12663794802, 12686809009 ],\n \"samples_ts\": [ 10.0871, 10.1076, 10.0892 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:36:09Z", + "avg_ns": 1758172778, + "stddev_ns": 1064741, + "avg_ts": 72.802874, + "stddev_ts": 0.044046, + "samples_ns": [ + 1759334853, + 1757246628, + 1757936854 + ], + "samples_ts": [ + 72.7548, + 72.8412, + 72.8126 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:36:16Z", + "avg_ns": 12680027422, + "stddev_ns": 14121531, + "avg_ts": 10.094624, + "stddev_ts": 0.011249, + "samples_ns": [ + 12689478456, + 12663794802, + 12686809009 + ], + "samples_ts": [ + 10.0871, + 10.1076, + 10.0892 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 716 + }, + { + "timestamp_utc": "2025-12-09T11:39:37.376667+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:36:56Z\",\n \"avg_ns\": 1753012068,\n \"stddev_ns\": 1167936,\n \"avg_ts\": 73.017203,\n \"stddev_ts\": 0.048638,\n \"samples_ns\": [ 1752784234, 1751974836, 1754277134 ],\n \"samples_ts\": [ 73.0267, 73.0604, 72.9645 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:37:03Z\",\n \"avg_ns\": 51339888895,\n \"stddev_ns\": 116856046,\n \"avg_ts\": 9.972787,\n \"stddev_ts\": 0.022671,\n \"samples_ns\": [ 51473850126, 51258905575, 51286910985 ],\n \"samples_ts\": [ 9.9468, 9.98851, 9.98305 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:36:56Z", + "avg_ns": 1753012068, + "stddev_ns": 1167936, + "avg_ts": 73.017203, + "stddev_ts": 0.048638, + "samples_ns": [ + 1752784234, + 1751974836, + 1754277134 + ], + "samples_ts": [ + 73.0267, + 73.0604, + 72.9645 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:37:03Z", + "avg_ns": 51339888895, + "stddev_ns": 116856046, + "avg_ts": 9.972787, + "stddev_ts": 0.022671, + "samples_ns": [ + 51473850126, + 51258905575, + 51286910985 + ], + "samples_ts": [ + 9.9468, + 9.98851, + 9.98305 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 717 + }, + { + "timestamp_utc": "2025-12-09T11:40:47.509198+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:39:38Z\",\n \"avg_ns\": 7350694791,\n \"stddev_ns\": 11634054,\n \"avg_ts\": 69.653396,\n \"stddev_ts\": 0.110139,\n \"samples_ns\": [ 7344518039, 7364114173, 7343452162 ],\n \"samples_ts\": [ 69.7119, 69.5264, 69.722 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:40:07Z\",\n \"avg_ns\": 13215382182,\n \"stddev_ns\": 65201268,\n \"avg_ts\": 9.685840,\n \"stddev_ts\": 0.047761,\n \"samples_ns\": [ 13152778212, 13210466271, 13282902065 ],\n \"samples_ts\": [ 9.73178, 9.68929, 9.63645 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:39:38Z", + "avg_ns": 7350694791, + "stddev_ns": 11634054, + "avg_ts": 69.653396, + "stddev_ts": 0.110139, + "samples_ns": [ + 7344518039, + 7364114173, + 7343452162 + ], + "samples_ts": [ + 69.7119, + 69.5264, + 69.722 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:40:07Z", + "avg_ns": 13215382182, + "stddev_ns": 65201268, + "avg_ts": 9.68584, + "stddev_ts": 0.047761, + "samples_ns": [ + 13152778212, + 13210466271, + 13282902065 + ], + "samples_ts": [ + 9.73178, + 9.68929, + 9.63645 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 718 + }, + { + "timestamp_utc": "2025-12-09T11:43:54.983602+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:40:48Z\",\n \"avg_ns\": 7351543765,\n \"stddev_ns\": 3319393,\n \"avg_ts\": 69.645245,\n \"stddev_ts\": 0.031440,\n \"samples_ns\": [ 7348873877, 7350497082, 7355260336 ],\n \"samples_ts\": [ 69.6705, 69.6552, 69.61 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 1B Q8_0\",\n \"model_size\": 1062773248,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:41:17Z\",\n \"avg_ns\": 52318347495,\n \"stddev_ns\": 983930251,\n \"avg_ts\": 9.788556,\n \"stddev_ts\": 0.184584,\n \"samples_ns\": [ 53255080452, 52406772865, 51293189170 ],\n \"samples_ts\": [ 9.61411, 9.76973, 9.98183 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:40:48Z", + "avg_ns": 7351543765, + "stddev_ns": 3319393, + "avg_ts": 69.645245, + "stddev_ts": 0.03144, + "samples_ns": [ + 7348873877, + 7350497082, + 7355260336 + ], + "samples_ts": [ + 69.6705, + 69.6552, + 69.61 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_type": "gemma3 1B Q8_0", + "model_size": 1062773248, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:41:17Z", + "avg_ns": 52318347495, + "stddev_ns": 983930251, + "avg_ts": 9.788556, + "stddev_ts": 0.184584, + "samples_ns": [ + 53255080452, + 52406772865, + 51293189170 + ], + "samples_ts": [ + 9.61411, + 9.76973, + 9.98183 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 719 + }, + { + "timestamp_utc": "2025-12-09T11:45:06.659897+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:44:03Z\",\n \"avg_ns\": 6346378053,\n \"stddev_ns\": 13945955,\n \"avg_ts\": 20.169049,\n \"stddev_ts\": 0.044352,\n \"samples_ns\": [ 6358367139, 6349693363, 6331073659 ],\n \"samples_ts\": [ 20.131, 20.1585, 20.2177 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:44:29Z\",\n \"avg_ns\": 12244123042,\n \"stddev_ns\": 19521728,\n \"avg_ts\": 10.454013,\n \"stddev_ts\": 0.016668,\n \"samples_ns\": [ 12263454178, 12244498803, 12224416145 ],\n \"samples_ts\": [ 10.4375, 10.4537, 10.4708 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:44:03Z", + "avg_ns": 6346378053, + "stddev_ns": 13945955, + "avg_ts": 20.169049, + "stddev_ts": 0.044352, + "samples_ns": [ + 6358367139, + 6349693363, + 6331073659 + ], + "samples_ts": [ + 20.131, + 20.1585, + 20.2177 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:44:29Z", + "avg_ns": 12244123042, + "stddev_ns": 19521728, + "avg_ts": 10.454013, + "stddev_ts": 0.016668, + "samples_ns": [ + 12263454178, + 12244498803, + 12224416145 + ], + "samples_ts": [ + 10.4375, + 10.4537, + 10.4708 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 720 + }, + { + "timestamp_utc": "2025-12-09T11:48:02.209748+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:45:07Z\",\n \"avg_ns\": 6331826699,\n \"stddev_ns\": 4810396,\n \"avg_ts\": 20.215343,\n \"stddev_ts\": 0.015362,\n \"samples_ns\": [ 6333965062, 6335196452, 6326318584 ],\n \"samples_ts\": [ 20.2085, 20.2046, 20.2329 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:45:33Z\",\n \"avg_ns\": 49662778311,\n \"stddev_ns\": 3104579,\n \"avg_ts\": 10.309532,\n \"stddev_ts\": 0.000641,\n \"samples_ns\": [ 49664693724, 49659215325, 49664425886 ],\n \"samples_ts\": [ 10.3091, 10.3103, 10.3092 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:45:07Z", + "avg_ns": 6331826699, + "stddev_ns": 4810396, + "avg_ts": 20.215343, + "stddev_ts": 0.015362, + "samples_ns": [ + 6333965062, + 6335196452, + 6326318584 + ], + "samples_ts": [ + 20.2085, + 20.2046, + 20.2329 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:45:33Z", + "avg_ns": 49662778311, + "stddev_ns": 3104579, + "avg_ts": 10.309532, + "stddev_ts": 0.000641, + "samples_ns": [ + 49664693724, + 49659215325, + 49664425886 + ], + "samples_ts": [ + 10.3091, + 10.3103, + 10.3092 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 721 + }, + { + "timestamp_utc": "2025-12-09T11:50:22.090604+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:48:03Z\",\n \"avg_ns\": 25544015100,\n \"stddev_ns\": 658879,\n \"avg_ts\": 20.043834,\n \"stddev_ts\": 0.000502,\n \"samples_ns\": [ 25544738562, 25543779983, 25543526756 ],\n \"samples_ts\": [ 20.0433, 20.044, 20.0442 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:49:45Z\",\n \"avg_ns\": 12176544387,\n \"stddev_ns\": 1586433,\n \"avg_ts\": 10.512014,\n \"stddev_ts\": 0.001363,\n \"samples_ns\": [ 12175160575, 12176208570, 12178264018 ],\n \"samples_ts\": [ 10.5132, 10.5123, 10.5105 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:48:03Z", + "avg_ns": 25544015100, + "stddev_ns": 658879, + "avg_ts": 20.043834, + "stddev_ts": 0.000502, + "samples_ns": [ + 25544738562, + 25543779983, + 25543526756 + ], + "samples_ts": [ + 20.0433, + 20.044, + 20.0442 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:49:45Z", + "avg_ns": 12176544387, + "stddev_ns": 1586433, + "avg_ts": 10.512014, + "stddev_ts": 0.001363, + "samples_ns": [ + 12175160575, + 12176208570, + 12178264018 + ], + "samples_ts": [ + 10.5132, + 10.5123, + 10.5105 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 722 + }, + { + "timestamp_utc": "2025-12-09T11:54:33.230675+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:50:23Z\",\n \"avg_ns\": 25543222025,\n \"stddev_ns\": 619979,\n \"avg_ts\": 20.044456,\n \"stddev_ts\": 0.000453,\n \"samples_ns\": [ 25543620284, 25543485855, 25542559938 ],\n \"samples_ts\": [ 20.0441, 20.0442, 20.045 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:52:05Z\",\n \"avg_ns\": 49272009652,\n \"stddev_ns\": 3175031,\n \"avg_ts\": 10.391295,\n \"stddev_ts\": 0.000666,\n \"samples_ns\": [ 49275653728, 49270338678, 49270036552 ],\n \"samples_ts\": [ 10.3905, 10.3916, 10.3917 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:50:23Z", + "avg_ns": 25543222025, + "stddev_ns": 619979, + "avg_ts": 20.044456, + "stddev_ts": 0.000453, + "samples_ns": [ + 25543620284, + 25543485855, + 25542559938 + ], + "samples_ts": [ + 20.0441, + 20.0442, + 20.045 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:52:05Z", + "avg_ns": 49272009652, + "stddev_ns": 3175031, + "avg_ts": 10.391295, + "stddev_ts": 0.000666, + "samples_ns": [ + 49275653728, + 49270338678, + 49270036552 + ], + "samples_ts": [ + 10.3905, + 10.3916, + 10.3917 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 723 + }, + { + "timestamp_utc": "2025-12-09T11:55:36.182631+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:54:34Z\",\n \"avg_ns\": 6327770427,\n \"stddev_ns\": 191981,\n \"avg_ts\": 20.228294,\n \"stddev_ts\": 0.000559,\n \"samples_ns\": [ 6327583392, 6327798428, 6327929462 ],\n \"samples_ts\": [ 20.2289, 20.2282, 20.2278 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:54:59Z\",\n \"avg_ns\": 12163340138,\n \"stddev_ns\": 1391301,\n \"avg_ts\": 10.523425,\n \"stddev_ts\": 0.001200,\n \"samples_ns\": [ 12163948184, 12164319185, 12161753046 ],\n \"samples_ts\": [ 10.5229, 10.5226, 10.5248 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:54:34Z", + "avg_ns": 6327770427, + "stddev_ns": 191981, + "avg_ts": 20.228294, + "stddev_ts": 0.000559, + "samples_ns": [ + 6327583392, + 6327798428, + 6327929462 + ], + "samples_ts": [ + 20.2289, + 20.2282, + 20.2278 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T11:54:59Z", + "avg_ns": 12163340138, + "stddev_ns": 1391301, + "avg_ts": 10.523425, + "stddev_ts": 0.0012, + "samples_ns": [ + 12163948184, + 12164319185, + 12161753046 + ], + "samples_ts": [ + 10.5229, + 10.5226, + 10.5248 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 724 + }, + { + "timestamp_utc": "2025-12-09T11:58:31.868082+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:55:37Z\",\n \"avg_ns\": 6327754911,\n \"stddev_ns\": 110503,\n \"avg_ts\": 20.228344,\n \"stddev_ts\": 0.000245,\n \"samples_ns\": [ 6327843368, 6327706825, 6327714541 ],\n \"samples_ts\": [ 20.2281, 20.2285, 20.2285 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:56:02Z\",\n \"avg_ns\": 49741928123,\n \"stddev_ns\": 490280,\n \"avg_ts\": 10.293127,\n \"stddev_ts\": 0.000101,\n \"samples_ns\": [ 49742480688, 49741758514, 49741545167 ],\n \"samples_ts\": [ 10.293, 10.2932, 10.2932 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:55:37Z", + "avg_ns": 6327754911, + "stddev_ns": 110503, + "avg_ts": 20.228344, + "stddev_ts": 0.000245, + "samples_ns": [ + 6327843368, + 6327706825, + 6327714541 + ], + "samples_ts": [ + 20.2281, + 20.2285, + 20.2285 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T11:56:02Z", + "avg_ns": 49741928123, + "stddev_ns": 490280, + "avg_ts": 10.293127, + "stddev_ts": 0.000101, + "samples_ns": [ + 49742480688, + 49741758514, + 49741545167 + ], + "samples_ts": [ + 10.293, + 10.2932, + 10.2932 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 725 + }, + { + "timestamp_utc": "2025-12-09T12:00:51.978038+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T11:58:32Z\",\n \"avg_ns\": 25580859037,\n \"stddev_ns\": 716685,\n \"avg_ts\": 20.014965,\n \"stddev_ts\": 0.000561,\n \"samples_ns\": [ 25580960552, 25581519552, 25580097007 ],\n \"samples_ts\": [ 20.0149, 20.0144, 20.0156 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:00:15Z\",\n \"avg_ns\": 12210325221,\n \"stddev_ns\": 2100318,\n \"avg_ts\": 10.482931,\n \"stddev_ts\": 0.001798,\n \"samples_ns\": [ 12212003122, 12210994716, 12207977827 ],\n \"samples_ts\": [ 10.4815, 10.4824, 10.4849 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T11:58:32Z", + "avg_ns": 25580859037, + "stddev_ns": 716685, + "avg_ts": 20.014965, + "stddev_ts": 0.000561, + "samples_ns": [ + 25580960552, + 25581519552, + 25580097007 + ], + "samples_ts": [ + 20.0149, + 20.0144, + 20.0156 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T12:00:15Z", + "avg_ns": 12210325221, + "stddev_ns": 2100318, + "avg_ts": 10.482931, + "stddev_ts": 0.001798, + "samples_ns": [ + 12212003122, + 12210994716, + 12207977827 + ], + "samples_ts": [ + 10.4815, + 10.4824, + 10.4849 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 726 + }, + { + "timestamp_utc": "2025-12-09T12:05:02.869931+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:00:52Z\",\n \"avg_ns\": 25574412178,\n \"stddev_ns\": 462044,\n \"avg_ts\": 20.020010,\n \"stddev_ts\": 0.000362,\n \"samples_ns\": [ 25574596506, 25573886421, 25574753607 ],\n \"samples_ts\": [ 20.0199, 20.0204, 20.0197 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:02:35Z\",\n \"avg_ns\": 49139839282,\n \"stddev_ns\": 1951498,\n \"avg_ts\": 10.419245,\n \"stddev_ts\": 0.000414,\n \"samples_ns\": [ 49141822625, 49139773952, 49137921269 ],\n \"samples_ts\": [ 10.4188, 10.4193, 10.4197 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:00:52Z", + "avg_ns": 25574412178, + "stddev_ns": 462044, + "avg_ts": 20.02001, + "stddev_ts": 0.000362, + "samples_ns": [ + 25574596506, + 25573886421, + 25574753607 + ], + "samples_ts": [ + 20.0199, + 20.0204, + 20.0197 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T12:02:35Z", + "avg_ns": 49139839282, + "stddev_ns": 1951498, + "avg_ts": 10.419245, + "stddev_ts": 0.000414, + "samples_ns": [ + 49141822625, + 49139773952, + 49137921269 + ], + "samples_ts": [ + 10.4188, + 10.4193, + 10.4197 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 727 + }, + { + "timestamp_utc": "2025-12-09T12:06:05.921909+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:05:03Z\",\n \"avg_ns\": 6327509304,\n \"stddev_ns\": 491712,\n \"avg_ts\": 20.229129,\n \"stddev_ts\": 0.001572,\n \"samples_ns\": [ 6327170377, 6327284271, 6328073264 ],\n \"samples_ts\": [ 20.2302, 20.2298, 20.2273 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:05:29Z\",\n \"avg_ns\": 12189598281,\n \"stddev_ns\": 2753148,\n \"avg_ts\": 10.500757,\n \"stddev_ts\": 0.002372,\n \"samples_ns\": [ 12192417699, 12186916571, 12189460573 ],\n \"samples_ts\": [ 10.4983, 10.5031, 10.5009 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:05:03Z", + "avg_ns": 6327509304, + "stddev_ns": 491712, + "avg_ts": 20.229129, + "stddev_ts": 0.001572, + "samples_ns": [ + 6327170377, + 6327284271, + 6328073264 + ], + "samples_ts": [ + 20.2302, + 20.2298, + 20.2273 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T12:05:29Z", + "avg_ns": 12189598281, + "stddev_ns": 2753148, + "avg_ts": 10.500757, + "stddev_ts": 0.002372, + "samples_ns": [ + 12192417699, + 12186916571, + 12189460573 + ], + "samples_ts": [ + 10.4983, + 10.5031, + 10.5009 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 728 + }, + { + "timestamp_utc": "2025-12-09T12:09:00.094783+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:06:06Z\",\n \"avg_ns\": 6328959606,\n \"stddev_ns\": 275609,\n \"avg_ts\": 20.224493,\n \"stddev_ts\": 0.000881,\n \"samples_ns\": [ 6328750878, 6328855920, 6329272020 ],\n \"samples_ts\": [ 20.2252, 20.2248, 20.2235 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:06:32Z\",\n \"avg_ns\": 49227996245,\n \"stddev_ns\": 1757254,\n \"avg_ts\": 10.400586,\n \"stddev_ts\": 0.000365,\n \"samples_ns\": [ 49229603882, 49228217667, 49226167188 ],\n \"samples_ts\": [ 10.4002, 10.4005, 10.401 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:06:06Z", + "avg_ns": 6328959606, + "stddev_ns": 275609, + "avg_ts": 20.224493, + "stddev_ts": 0.000881, + "samples_ns": [ + 6328750878, + 6328855920, + 6329272020 + ], + "samples_ts": [ + 20.2252, + 20.2248, + 20.2235 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T12:06:32Z", + "avg_ns": 49227996245, + "stddev_ns": 1757254, + "avg_ts": 10.400586, + "stddev_ts": 0.000365, + "samples_ns": [ + 49229603882, + 49228217667, + 49226167188 + ], + "samples_ts": [ + 10.4002, + 10.4005, + 10.401 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 729 + }, + { + "timestamp_utc": "2025-12-09T12:11:21.597220+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:09:01Z\",\n \"avg_ns\": 25958183407,\n \"stddev_ns\": 682599,\n \"avg_ts\": 19.724030,\n \"stddev_ts\": 0.000519,\n \"samples_ns\": [ 25957563020, 25958914649, 25958072552 ],\n \"samples_ts\": [ 19.7245, 19.7235, 19.7241 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:10:44Z\",\n \"avg_ns\": 12153636676,\n \"stddev_ns\": 3233643,\n \"avg_ts\": 10.531828,\n \"stddev_ts\": 0.002799,\n \"samples_ns\": [ 12156077086, 12154858898, 12149974046 ],\n \"samples_ts\": [ 10.5297, 10.5308, 10.535 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:09:01Z", + "avg_ns": 25958183407, + "stddev_ns": 682599, + "avg_ts": 19.72403, + "stddev_ts": 0.000519, + "samples_ns": [ + 25957563020, + 25958914649, + 25958072552 + ], + "samples_ts": [ + 19.7245, + 19.7235, + 19.7241 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T12:10:44Z", + "avg_ns": 12153636676, + "stddev_ns": 3233643, + "avg_ts": 10.531828, + "stddev_ts": 0.002799, + "samples_ns": [ + 12156077086, + 12154858898, + 12149974046 + ], + "samples_ts": [ + 10.5297, + 10.5308, + 10.535 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 730 + }, + { + "timestamp_utc": "2025-12-09T12:15:34.171505+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:11:22Z\",\n \"avg_ns\": 25946500206,\n \"stddev_ns\": 335753,\n \"avg_ts\": 19.732912,\n \"stddev_ts\": 0.000255,\n \"samples_ns\": [ 25946798331, 25946565787, 25946136500 ],\n \"samples_ts\": [ 19.7327, 19.7329, 19.7332 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:13:06Z\",\n \"avg_ns\": 49190482670,\n \"stddev_ns\": 3281533,\n \"avg_ts\": 10.408518,\n \"stddev_ts\": 0.000693,\n \"samples_ns\": [ 49194204390, 49188046595, 49189197026 ],\n \"samples_ts\": [ 10.4077, 10.409, 10.4088 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:11:22Z", + "avg_ns": 25946500206, + "stddev_ns": 335753, + "avg_ts": 19.732912, + "stddev_ts": 0.000255, + "samples_ns": [ + 25946798331, + 25946565787, + 25946136500 + ], + "samples_ts": [ + 19.7327, + 19.7329, + 19.7332 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T12:13:06Z", + "avg_ns": 49190482670, + "stddev_ns": 3281533, + "avg_ts": 10.408518, + "stddev_ts": 0.000693, + "samples_ns": [ + 49194204390, + 49188046595, + 49189197026 + ], + "samples_ts": [ + 10.4077, + 10.409, + 10.4088 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 731 + }, + { + "timestamp_utc": "2025-12-09T12:16:37.168737+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:15:35Z\",\n \"avg_ns\": 6326101583,\n \"stddev_ns\": 192700,\n \"avg_ts\": 20.233630,\n \"stddev_ts\": 0.000616,\n \"samples_ns\": [ 6325884950, 6326165898, 6326253901 ],\n \"samples_ts\": [ 20.2343, 20.2334, 20.2331 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:16:00Z\",\n \"avg_ns\": 12180722366,\n \"stddev_ns\": 1511458,\n \"avg_ts\": 10.508408,\n \"stddev_ts\": 0.001300,\n \"samples_ns\": [ 12180055251, 12182448240, 12179663608 ],\n \"samples_ts\": [ 10.509, 10.5069, 10.5093 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:15:35Z", + "avg_ns": 6326101583, + "stddev_ns": 192700, + "avg_ts": 20.23363, + "stddev_ts": 0.000616, + "samples_ns": [ + 6325884950, + 6326165898, + 6326253901 + ], + "samples_ts": [ + 20.2343, + 20.2334, + 20.2331 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T12:16:00Z", + "avg_ns": 12180722366, + "stddev_ns": 1511458, + "avg_ts": 10.508408, + "stddev_ts": 0.0013, + "samples_ns": [ + 12180055251, + 12182448240, + 12179663608 + ], + "samples_ts": [ + 10.509, + 10.5069, + 10.5093 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 732 + }, + { + "timestamp_utc": "2025-12-09T12:19:31.386251+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:16:38Z\",\n \"avg_ns\": 6326929423,\n \"stddev_ns\": 615500,\n \"avg_ts\": 20.230983,\n \"stddev_ts\": 0.001935,\n \"samples_ns\": [ 6326743081, 6326439376, 6327605814 ],\n \"samples_ts\": [ 20.2316, 20.2325, 20.2288 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:17:03Z\",\n \"avg_ns\": 49251587235,\n \"stddev_ns\": 1446171,\n \"avg_ts\": 10.395604,\n \"stddev_ts\": 0.000298,\n \"samples_ns\": [ 49250203097, 49251533632, 49253024978 ],\n \"samples_ts\": [ 10.3959, 10.3956, 10.3953 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:16:38Z", + "avg_ns": 6326929423, + "stddev_ns": 615500, + "avg_ts": 20.230983, + "stddev_ts": 0.001935, + "samples_ns": [ + 6326743081, + 6326439376, + 6327605814 + ], + "samples_ts": [ + 20.2316, + 20.2325, + 20.2288 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T12:17:03Z", + "avg_ns": 49251587235, + "stddev_ns": 1446171, + "avg_ts": 10.395604, + "stddev_ts": 0.000298, + "samples_ns": [ + 49250203097, + 49251533632, + 49253024978 + ], + "samples_ts": [ + 10.3959, + 10.3956, + 10.3953 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 733 + }, + { + "timestamp_utc": "2025-12-09T12:21:51.397534+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:19:32Z\",\n \"avg_ns\": 25553135188,\n \"stddev_ns\": 337319,\n \"avg_ts\": 20.036680,\n \"stddev_ts\": 0.000233,\n \"samples_ns\": [ 25553131428, 25553434088, 25552840049 ],\n \"samples_ts\": [ 20.0367, 20.0364, 20.0369 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:21:14Z\",\n \"avg_ns\": 12214906238,\n \"stddev_ns\": 1718601,\n \"avg_ts\": 10.479000,\n \"stddev_ts\": 0.001468,\n \"samples_ns\": [ 12216875539, 12214064956, 12213778221 ],\n \"samples_ts\": [ 10.4773, 10.4797, 10.48 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:19:32Z", + "avg_ns": 25553135188, + "stddev_ns": 337319, + "avg_ts": 20.03668, + "stddev_ts": 0.000233, + "samples_ns": [ + 25553131428, + 25553434088, + 25552840049 + ], + "samples_ts": [ + 20.0367, + 20.0364, + 20.0369 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T12:21:14Z", + "avg_ns": 12214906238, + "stddev_ns": 1718601, + "avg_ts": 10.479, + "stddev_ts": 0.001468, + "samples_ns": [ + 12216875539, + 12214064956, + 12213778221 + ], + "samples_ts": [ + 10.4773, + 10.4797, + 10.48 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 734 + }, + { + "timestamp_utc": "2025-12-09T12:26:02.689409+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:21:52Z\",\n \"avg_ns\": 25542318121,\n \"stddev_ns\": 639979,\n \"avg_ts\": 20.045166,\n \"stddev_ts\": 0.000486,\n \"samples_ns\": [ 25542925493, 25542342089, 25541686782 ],\n \"samples_ts\": [ 20.0447, 20.0451, 20.0457 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:23:34Z\",\n \"avg_ns\": 49322107534,\n \"stddev_ns\": 1509477,\n \"avg_ts\": 10.380741,\n \"stddev_ts\": 0.000311,\n \"samples_ns\": [ 49323776626, 49320972192, 49321573786 ],\n \"samples_ts\": [ 10.3804, 10.381, 10.3809 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:21:52Z", + "avg_ns": 25542318121, + "stddev_ns": 639979, + "avg_ts": 20.045166, + "stddev_ts": 0.000486, + "samples_ns": [ + 25542925493, + 25542342089, + 25541686782 + ], + "samples_ts": [ + 20.0447, + 20.0451, + 20.0457 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T12:23:34Z", + "avg_ns": 49322107534, + "stddev_ns": 1509477, + "avg_ts": 10.380741, + "stddev_ts": 0.000311, + "samples_ns": [ + 49323776626, + 49320972192, + 49321573786 + ], + "samples_ts": [ + 10.3804, + 10.381, + 10.3809 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 735 + }, + { + "timestamp_utc": "2025-12-09T12:27:05.885381+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:26:03Z\",\n \"avg_ns\": 6327228557,\n \"stddev_ns\": 183735,\n \"avg_ts\": 20.230026,\n \"stddev_ts\": 0.000530,\n \"samples_ns\": [ 6327183950, 6327411917, 6327089805 ],\n \"samples_ts\": [ 20.2302, 20.2294, 20.2305 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:26:28Z\",\n \"avg_ns\": 12243926320,\n \"stddev_ns\": 924521,\n \"avg_ts\": 10.454163,\n \"stddev_ts\": 0.000789,\n \"samples_ns\": [ 12244629591, 12244270240, 12242879129 ],\n \"samples_ts\": [ 10.4536, 10.4539, 10.4551 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:26:03Z", + "avg_ns": 6327228557, + "stddev_ns": 183735, + "avg_ts": 20.230026, + "stddev_ts": 0.00053, + "samples_ns": [ + 6327183950, + 6327411917, + 6327089805 + ], + "samples_ts": [ + 20.2302, + 20.2294, + 20.2305 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T12:26:28Z", + "avg_ns": 12243926320, + "stddev_ns": 924521, + "avg_ts": 10.454163, + "stddev_ts": 0.000789, + "samples_ns": [ + 12244629591, + 12244270240, + 12242879129 + ], + "samples_ts": [ + 10.4536, + 10.4539, + 10.4551 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 736 + }, + { + "timestamp_utc": "2025-12-09T12:29:59.955685+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:27:06Z\",\n \"avg_ns\": 6327314389,\n \"stddev_ns\": 208353,\n \"avg_ts\": 20.229752,\n \"stddev_ts\": 0.000666,\n \"samples_ns\": [ 6327160531, 6327551496, 6327231140 ],\n \"samples_ts\": [ 20.2302, 20.229, 20.23 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:27:32Z\",\n \"avg_ns\": 49202243663,\n \"stddev_ns\": 1149195,\n \"avg_ts\": 10.406030,\n \"stddev_ts\": 0.000234,\n \"samples_ns\": [ 49203092655, 49200993544, 49202644792 ],\n \"samples_ts\": [ 10.4058, 10.4063, 10.4059 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:27:06Z", + "avg_ns": 6327314389, + "stddev_ns": 208353, + "avg_ts": 20.229752, + "stddev_ts": 0.000666, + "samples_ns": [ + 6327160531, + 6327551496, + 6327231140 + ], + "samples_ts": [ + 20.2302, + 20.229, + 20.23 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T12:27:32Z", + "avg_ns": 49202243663, + "stddev_ns": 1149195, + "avg_ts": 10.40603, + "stddev_ts": 0.000234, + "samples_ns": [ + 49203092655, + 49200993544, + 49202644792 + ], + "samples_ts": [ + 10.4058, + 10.4063, + 10.4059 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 737 + }, + { + "timestamp_utc": "2025-12-09T12:32:19.873914+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:30:00Z\",\n \"avg_ns\": 25582357121,\n \"stddev_ns\": 1165875,\n \"avg_ts\": 20.013793,\n \"stddev_ts\": 0.000895,\n \"samples_ns\": [ 25583661823, 25581882022, 25581527520 ],\n \"samples_ts\": [ 20.0128, 20.0142, 20.0144 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:31:43Z\",\n \"avg_ns\": 12136425495,\n \"stddev_ns\": 3544996,\n \"avg_ts\": 10.546763,\n \"stddev_ts\": 0.003081,\n \"samples_ns\": [ 12139636055, 12137019357, 12132621073 ],\n \"samples_ts\": [ 10.544, 10.5462, 10.5501 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:30:00Z", + "avg_ns": 25582357121, + "stddev_ns": 1165875, + "avg_ts": 20.013793, + "stddev_ts": 0.000895, + "samples_ns": [ + 25583661823, + 25581882022, + 25581527520 + ], + "samples_ts": [ + 20.0128, + 20.0142, + 20.0144 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T12:31:43Z", + "avg_ns": 12136425495, + "stddev_ns": 3544996, + "avg_ts": 10.546763, + "stddev_ts": 0.003081, + "samples_ns": [ + 12139636055, + 12137019357, + 12132621073 + ], + "samples_ts": [ + 10.544, + 10.5462, + 10.5501 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 738 + }, + { + "timestamp_utc": "2025-12-09T12:36:31.649599+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:32:20Z\",\n \"avg_ns\": 25582014244,\n \"stddev_ns\": 6859012,\n \"avg_ts\": 20.014062,\n \"stddev_ts\": 0.005365,\n \"samples_ns\": [ 25589917849, 25578504963, 25577619920 ],\n \"samples_ts\": [ 20.0079, 20.0168, 20.0175 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:34:03Z\",\n \"avg_ns\": 49333262577,\n \"stddev_ns\": 757244,\n \"avg_ts\": 10.378393,\n \"stddev_ts\": 0.000159,\n \"samples_ns\": [ 49333129002, 49334077721, 49332581008 ],\n \"samples_ts\": [ 10.3784, 10.3782, 10.3785 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:32:20Z", + "avg_ns": 25582014244, + "stddev_ns": 6859012, + "avg_ts": 20.014062, + "stddev_ts": 0.005365, + "samples_ns": [ + 25589917849, + 25578504963, + 25577619920 + ], + "samples_ts": [ + 20.0079, + 20.0168, + 20.0175 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T12:34:03Z", + "avg_ns": 49333262577, + "stddev_ns": 757244, + "avg_ts": 10.378393, + "stddev_ts": 0.000159, + "samples_ns": [ + 49333129002, + 49334077721, + 49332581008 + ], + "samples_ts": [ + 10.3784, + 10.3782, + 10.3785 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 739 + }, + { + "timestamp_utc": "2025-12-09T12:37:34.782969+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:36:32Z\",\n \"avg_ns\": 6332009653,\n \"stddev_ns\": 5247323,\n \"avg_ts\": 20.214760,\n \"stddev_ts\": 0.016742,\n \"samples_ns\": [ 6338066945, 6329080907, 6328881108 ],\n \"samples_ts\": [ 20.1954, 20.2241, 20.2247 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:36:57Z\",\n \"avg_ns\": 12214097262,\n \"stddev_ns\": 923312,\n \"avg_ts\": 10.479694,\n \"stddev_ts\": 0.000786,\n \"samples_ns\": [ 12215116782, 12213833931, 12213341074 ],\n \"samples_ts\": [ 10.4788, 10.4799, 10.4803 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:36:32Z", + "avg_ns": 6332009653, + "stddev_ns": 5247323, + "avg_ts": 20.21476, + "stddev_ts": 0.016742, + "samples_ns": [ + 6338066945, + 6329080907, + 6328881108 + ], + "samples_ts": [ + 20.1954, + 20.2241, + 20.2247 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T12:36:57Z", + "avg_ns": 12214097262, + "stddev_ns": 923312, + "avg_ts": 10.479694, + "stddev_ts": 0.000786, + "samples_ns": [ + 12215116782, + 12213833931, + 12213341074 + ], + "samples_ts": [ + 10.4788, + 10.4799, + 10.4803 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 740 + }, + { + "timestamp_utc": "2025-12-09T12:40:29.567240+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:37:35Z\",\n \"avg_ns\": 6327375934,\n \"stddev_ns\": 1654180,\n \"avg_ts\": 20.229556,\n \"stddev_ts\": 0.005282,\n \"samples_ns\": [ 6329263240, 6326190323, 6326674240 ],\n \"samples_ts\": [ 20.2235, 20.2333, 20.2318 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:38:01Z\",\n \"avg_ns\": 49413854190,\n \"stddev_ns\": 2281330,\n \"avg_ts\": 10.361467,\n \"stddev_ts\": 0.000476,\n \"samples_ns\": [ 49414215267, 49415922490, 49411424814 ],\n \"samples_ts\": [ 10.3614, 10.361, 10.362 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:37:35Z", + "avg_ns": 6327375934, + "stddev_ns": 1654180, + "avg_ts": 20.229556, + "stddev_ts": 0.005282, + "samples_ns": [ + 6329263240, + 6326190323, + 6326674240 + ], + "samples_ts": [ + 20.2235, + 20.2333, + 20.2318 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T12:38:01Z", + "avg_ns": 49413854190, + "stddev_ns": 2281330, + "avg_ts": 10.361467, + "stddev_ts": 0.000476, + "samples_ns": [ + 49414215267, + 49415922490, + 49411424814 + ], + "samples_ts": [ + 10.3614, + 10.361, + 10.362 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 741 + }, + { + "timestamp_utc": "2025-12-09T12:42:51.246054+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:40:30Z\",\n \"avg_ns\": 25952712266,\n \"stddev_ns\": 1222612,\n \"avg_ts\": 19.728189,\n \"stddev_ts\": 0.000929,\n \"samples_ns\": [ 25954071216, 25952364034, 25951701548 ],\n \"samples_ts\": [ 19.7272, 19.7285, 19.729 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:42:14Z\",\n \"avg_ns\": 12229105127,\n \"stddev_ns\": 4694391,\n \"avg_ts\": 10.466834,\n \"stddev_ts\": 0.004015,\n \"samples_ns\": [ 12226099786, 12226704109, 12234511488 ],\n \"samples_ts\": [ 10.4694, 10.4689, 10.4622 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:40:30Z", + "avg_ns": 25952712266, + "stddev_ns": 1222612, + "avg_ts": 19.728189, + "stddev_ts": 0.000929, + "samples_ns": [ + 25954071216, + 25952364034, + 25951701548 + ], + "samples_ts": [ + 19.7272, + 19.7285, + 19.729 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T12:42:14Z", + "avg_ns": 12229105127, + "stddev_ns": 4694391, + "avg_ts": 10.466834, + "stddev_ts": 0.004015, + "samples_ns": [ + 12226099786, + 12226704109, + 12234511488 + ], + "samples_ts": [ + 10.4694, + 10.4689, + 10.4622 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 742 + }, + { + "timestamp_utc": "2025-12-09T12:47:03.908522+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:42:52Z\",\n \"avg_ns\": 25944005653,\n \"stddev_ns\": 2842079,\n \"avg_ts\": 19.734809,\n \"stddev_ts\": 0.002158,\n \"samples_ns\": [ 25942667432, 25947264811, 25942084717 ],\n \"samples_ts\": [ 19.7358, 19.7323, 19.7363 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:44:36Z\",\n \"avg_ns\": 49242209277,\n \"stddev_ns\": 3646087,\n \"avg_ts\": 10.397584,\n \"stddev_ts\": 0.000767,\n \"samples_ns\": [ 49246378729, 49240521069, 49239728035 ],\n \"samples_ts\": [ 10.3967, 10.3979, 10.3981 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:42:52Z", + "avg_ns": 25944005653, + "stddev_ns": 2842079, + "avg_ts": 19.734809, + "stddev_ts": 0.002158, + "samples_ns": [ + 25942667432, + 25947264811, + 25942084717 + ], + "samples_ts": [ + 19.7358, + 19.7323, + 19.7363 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T12:44:36Z", + "avg_ns": 49242209277, + "stddev_ns": 3646087, + "avg_ts": 10.397584, + "stddev_ts": 0.000767, + "samples_ns": [ + 49246378729, + 49240521069, + 49239728035 + ], + "samples_ts": [ + 10.3967, + 10.3979, + 10.3981 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 743 + }, + { + "timestamp_utc": "2025-12-09T12:48:07.018263+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:47:04Z\",\n \"avg_ns\": 6327354988,\n \"stddev_ns\": 196890,\n \"avg_ts\": 20.229622,\n \"stddev_ts\": 0.000517,\n \"samples_ns\": [ 6327181331, 6327382711, 6327500924 ],\n \"samples_ts\": [ 20.2302, 20.2295, 20.2292 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:47:30Z\",\n \"avg_ns\": 12212581011,\n \"stddev_ns\": 3580353,\n \"avg_ts\": 10.480996,\n \"stddev_ts\": 0.003071,\n \"samples_ns\": [ 12216158597, 12209001303, 12212583134 ],\n \"samples_ts\": [ 10.4779, 10.4841, 10.481 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:47:04Z", + "avg_ns": 6327354988, + "stddev_ns": 196890, + "avg_ts": 20.229622, + "stddev_ts": 0.000517, + "samples_ns": [ + 6327181331, + 6327382711, + 6327500924 + ], + "samples_ts": [ + 20.2302, + 20.2295, + 20.2292 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T12:47:30Z", + "avg_ns": 12212581011, + "stddev_ns": 3580353, + "avg_ts": 10.480996, + "stddev_ts": 0.003071, + "samples_ns": [ + 12216158597, + 12209001303, + 12212583134 + ], + "samples_ts": [ + 10.4779, + 10.4841, + 10.481 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 744 + }, + { + "timestamp_utc": "2025-12-09T12:51:01.638367+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:48:07Z\",\n \"avg_ns\": 6326496032,\n \"stddev_ns\": 408002,\n \"avg_ts\": 20.232369,\n \"stddev_ts\": 0.001254,\n \"samples_ns\": [ 6326306183, 6326234894, 6326947021 ],\n \"samples_ts\": [ 20.233, 20.2332, 20.2309 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:48:33Z\",\n \"avg_ns\": 49384060030,\n \"stddev_ns\": 2585437,\n \"avg_ts\": 10.367718,\n \"stddev_ts\": 0.000543,\n \"samples_ns\": [ 49383559838, 49386859017, 49381761235 ],\n \"samples_ts\": [ 10.3678, 10.3671, 10.3682 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:48:07Z", + "avg_ns": 6326496032, + "stddev_ns": 408002, + "avg_ts": 20.232369, + "stddev_ts": 0.001254, + "samples_ns": [ + 6326306183, + 6326234894, + 6326947021 + ], + "samples_ts": [ + 20.233, + 20.2332, + 20.2309 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T12:48:33Z", + "avg_ns": 49384060030, + "stddev_ns": 2585437, + "avg_ts": 10.367718, + "stddev_ts": 0.000543, + "samples_ns": [ + 49383559838, + 49386859017, + 49381761235 + ], + "samples_ts": [ + 10.3678, + 10.3671, + 10.3682 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 745 + }, + { + "timestamp_utc": "2025-12-09T12:53:21.557566+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:51:02Z\",\n \"avg_ns\": 25542268719,\n \"stddev_ns\": 380751,\n \"avg_ts\": 20.045205,\n \"stddev_ts\": 0.000299,\n \"samples_ns\": [ 25542669356, 25542225211, 25541911590 ],\n \"samples_ts\": [ 20.0449, 20.0452, 20.0455 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:52:44Z\",\n \"avg_ns\": 12197704081,\n \"stddev_ns\": 17027534,\n \"avg_ts\": 10.493792,\n \"stddev_ts\": 0.014637,\n \"samples_ns\": [ 12187626652, 12217363705, 12188121886 ],\n \"samples_ts\": [ 10.5025, 10.4769, 10.502 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:51:02Z", + "avg_ns": 25542268719, + "stddev_ns": 380751, + "avg_ts": 20.045205, + "stddev_ts": 0.000299, + "samples_ns": [ + 25542669356, + 25542225211, + 25541911590 + ], + "samples_ts": [ + 20.0449, + 20.0452, + 20.0455 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T12:52:44Z", + "avg_ns": 12197704081, + "stddev_ns": 17027534, + "avg_ts": 10.493792, + "stddev_ts": 0.014637, + "samples_ns": [ + 12187626652, + 12217363705, + 12188121886 + ], + "samples_ts": [ + 10.5025, + 10.4769, + 10.502 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 746 + }, + { + "timestamp_utc": "2025-12-09T12:57:32.875889+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:53:22Z\",\n \"avg_ns\": 25541825277,\n \"stddev_ns\": 816020,\n \"avg_ts\": 20.045553,\n \"stddev_ts\": 0.000640,\n \"samples_ns\": [ 25542111830, 25540904630, 25542459371 ],\n \"samples_ts\": [ 20.0453, 20.0463, 20.0451 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:55:04Z\",\n \"avg_ns\": 49311863009,\n \"stddev_ns\": 2915712,\n \"avg_ts\": 10.382897,\n \"stddev_ts\": 0.000610,\n \"samples_ns\": [ 49315203219, 49310380000, 49310005810 ],\n \"samples_ts\": [ 10.3822, 10.3832, 10.3833 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:53:22Z", + "avg_ns": 25541825277, + "stddev_ns": 816020, + "avg_ts": 20.045553, + "stddev_ts": 0.00064, + "samples_ns": [ + 25542111830, + 25540904630, + 25542459371 + ], + "samples_ts": [ + 20.0453, + 20.0463, + 20.0451 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T12:55:04Z", + "avg_ns": 49311863009, + "stddev_ns": 2915712, + "avg_ts": 10.382897, + "stddev_ts": 0.00061, + "samples_ns": [ + 49315203219, + 49310380000, + 49310005810 + ], + "samples_ts": [ + 10.3822, + 10.3832, + 10.3833 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 747 + }, + { + "timestamp_utc": "2025-12-09T12:58:35.868252+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:57:33Z\",\n \"avg_ns\": 6326706819,\n \"stddev_ns\": 183100,\n \"avg_ts\": 20.231695,\n \"stddev_ts\": 0.000462,\n \"samples_ns\": [ 6326853254, 6326564393, 6326702812 ],\n \"samples_ts\": [ 20.2312, 20.2322, 20.2317 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:57:59Z\",\n \"avg_ns\": 12179088574,\n \"stddev_ns\": 8171541,\n \"avg_ts\": 10.509821,\n \"stddev_ts\": 0.007049,\n \"samples_ns\": [ 12188486028, 12175124683, 12173655011 ],\n \"samples_ts\": [ 10.5017, 10.5132, 10.5145 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:57:33Z", + "avg_ns": 6326706819, + "stddev_ns": 183100, + "avg_ts": 20.231695, + "stddev_ts": 0.000462, + "samples_ns": [ + 6326853254, + 6326564393, + 6326702812 + ], + "samples_ts": [ + 20.2312, + 20.2322, + 20.2317 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T12:57:59Z", + "avg_ns": 12179088574, + "stddev_ns": 8171541, + "avg_ts": 10.509821, + "stddev_ts": 0.007049, + "samples_ns": [ + 12188486028, + 12175124683, + 12173655011 + ], + "samples_ts": [ + 10.5017, + 10.5132, + 10.5145 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 748 + }, + { + "timestamp_utc": "2025-12-09T13:01:30.183197+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:58:36Z\",\n \"avg_ns\": 6326971286,\n \"stddev_ns\": 207064,\n \"avg_ts\": 20.230849,\n \"stddev_ts\": 0.000662,\n \"samples_ns\": [ 6327008625, 6327157140, 6326748093 ],\n \"samples_ts\": [ 20.2307, 20.2303, 20.2316 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T12:59:02Z\",\n \"avg_ns\": 49277981835,\n \"stddev_ns\": 7446511,\n \"avg_ts\": 10.390036,\n \"stddev_ts\": 0.001569,\n \"samples_ns\": [ 49286526981, 49274507144, 49272911381 ],\n \"samples_ts\": [ 10.3882, 10.3908, 10.3911 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T12:58:36Z", + "avg_ns": 6326971286, + "stddev_ns": 207064, + "avg_ts": 20.230849, + "stddev_ts": 0.000662, + "samples_ns": [ + 6327008625, + 6327157140, + 6326748093 + ], + "samples_ts": [ + 20.2307, + 20.2303, + 20.2316 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T12:59:02Z", + "avg_ns": 49277981835, + "stddev_ns": 7446511, + "avg_ts": 10.390036, + "stddev_ts": 0.001569, + "samples_ns": [ + 49286526981, + 49274507144, + 49272911381 + ], + "samples_ts": [ + 10.3882, + 10.3908, + 10.3911 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 749 + }, + { + "timestamp_utc": "2025-12-09T13:03:50.068676+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:01:31Z\",\n \"avg_ns\": 25581344597,\n \"stddev_ns\": 335188,\n \"avg_ts\": 20.014585,\n \"stddev_ts\": 0.000230,\n \"samples_ns\": [ 25581652441, 25581315952, 25581065399 ],\n \"samples_ts\": [ 20.0143, 20.0146, 20.0148 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:03:13Z\",\n \"avg_ns\": 12133439733,\n \"stddev_ns\": 2134813,\n \"avg_ts\": 10.549358,\n \"stddev_ts\": 0.001853,\n \"samples_ns\": [ 12132590442, 12135865460, 12131863298 ],\n \"samples_ts\": [ 10.5501, 10.5472, 10.5507 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:01:31Z", + "avg_ns": 25581344597, + "stddev_ns": 335188, + "avg_ts": 20.014585, + "stddev_ts": 0.00023, + "samples_ns": [ + 25581652441, + 25581315952, + 25581065399 + ], + "samples_ts": [ + 20.0143, + 20.0146, + 20.0148 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:03:13Z", + "avg_ns": 12133439733, + "stddev_ns": 2134813, + "avg_ts": 10.549358, + "stddev_ts": 0.001853, + "samples_ns": [ + 12132590442, + 12135865460, + 12131863298 + ], + "samples_ts": [ + 10.5501, + 10.5472, + 10.5507 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 750 + }, + { + "timestamp_utc": "2025-12-09T13:08:01.182315+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:03:51Z\",\n \"avg_ns\": 25573447402,\n \"stddev_ns\": 469420,\n \"avg_ts\": 20.020766,\n \"stddev_ts\": 0.000322,\n \"samples_ns\": [ 25573874725, 25573413339, 25573054144 ],\n \"samples_ts\": [ 20.0204, 20.0208, 20.0211 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:05:33Z\",\n \"avg_ns\": 49221326704,\n \"stddev_ns\": 3630689,\n \"avg_ts\": 10.401995,\n \"stddev_ts\": 0.000766,\n \"samples_ns\": [ 49222247316, 49217331283, 49224401514 ],\n \"samples_ts\": [ 10.4018, 10.4028, 10.4013 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:03:51Z", + "avg_ns": 25573447402, + "stddev_ns": 469420, + "avg_ts": 20.020766, + "stddev_ts": 0.000322, + "samples_ns": [ + 25573874725, + 25573413339, + 25573054144 + ], + "samples_ts": [ + 20.0204, + 20.0208, + 20.0211 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:05:33Z", + "avg_ns": 49221326704, + "stddev_ns": 3630689, + "avg_ts": 10.401995, + "stddev_ts": 0.000766, + "samples_ns": [ + 49222247316, + 49217331283, + 49224401514 + ], + "samples_ts": [ + 10.4018, + 10.4028, + 10.4013 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 751 + }, + { + "timestamp_utc": "2025-12-09T13:09:04.221687+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:08:02Z\",\n \"avg_ns\": 6329967918,\n \"stddev_ns\": 3424756,\n \"avg_ts\": 20.221275,\n \"stddev_ts\": 0.010931,\n \"samples_ns\": [ 6328055082, 6327928993, 6333919681 ],\n \"samples_ts\": [ 20.2274, 20.2278, 20.2087 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:08:27Z\",\n \"avg_ns\": 12189816928,\n \"stddev_ns\": 338385,\n \"avg_ts\": 10.500568,\n \"stddev_ts\": 0.000291,\n \"samples_ns\": [ 12190032717, 12189426932, 12189991135 ],\n \"samples_ts\": [ 10.5004, 10.5009, 10.5004 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:08:02Z", + "avg_ns": 6329967918, + "stddev_ns": 3424756, + "avg_ts": 20.221275, + "stddev_ts": 0.010931, + "samples_ns": [ + 6328055082, + 6327928993, + 6333919681 + ], + "samples_ts": [ + 20.2274, + 20.2278, + 20.2087 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:08:27Z", + "avg_ns": 12189816928, + "stddev_ns": 338385, + "avg_ts": 10.500568, + "stddev_ts": 0.000291, + "samples_ns": [ + 12190032717, + 12189426932, + 12189991135 + ], + "samples_ts": [ + 10.5004, + 10.5009, + 10.5004 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 752 + }, + { + "timestamp_utc": "2025-12-09T13:11:58.550667+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:09:05Z\",\n \"avg_ns\": 6330021280,\n \"stddev_ns\": 5343894,\n \"avg_ts\": 20.221111,\n \"stddev_ts\": 0.017063,\n \"samples_ns\": [ 6327108242, 6326766868, 6336188730 ],\n \"samples_ts\": [ 20.2304, 20.2315, 20.2014 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:09:30Z\",\n \"avg_ns\": 49287579501,\n \"stddev_ns\": 2725819,\n \"avg_ts\": 10.388013,\n \"stddev_ts\": 0.000573,\n \"samples_ns\": [ 49289637384, 49288601083, 49284500037 ],\n \"samples_ts\": [ 10.3876, 10.3878, 10.3887 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:09:05Z", + "avg_ns": 6330021280, + "stddev_ns": 5343894, + "avg_ts": 20.221111, + "stddev_ts": 0.017063, + "samples_ns": [ + 6327108242, + 6326766868, + 6336188730 + ], + "samples_ts": [ + 20.2304, + 20.2315, + 20.2014 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:09:30Z", + "avg_ns": 49287579501, + "stddev_ns": 2725819, + "avg_ts": 10.388013, + "stddev_ts": 0.000573, + "samples_ns": [ + 49289637384, + 49288601083, + 49284500037 + ], + "samples_ts": [ + 10.3876, + 10.3878, + 10.3887 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 753 + }, + { + "timestamp_utc": "2025-12-09T13:14:20.220866+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:11:59Z\",\n \"avg_ns\": 25951734015,\n \"stddev_ns\": 528042,\n \"avg_ts\": 19.728932,\n \"stddev_ts\": 0.000382,\n \"samples_ns\": [ 25951753030, 25952227107, 25951221909 ],\n \"samples_ts\": [ 19.7289, 19.7286, 19.7293 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:13:43Z\",\n \"avg_ns\": 12227357224,\n \"stddev_ns\": 1386971,\n \"avg_ts\": 10.468329,\n \"stddev_ts\": 0.001184,\n \"samples_ns\": [ 12228826863, 12227162390, 12226082420 ],\n \"samples_ts\": [ 10.4671, 10.4685, 10.4694 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:11:59Z", + "avg_ns": 25951734015, + "stddev_ns": 528042, + "avg_ts": 19.728932, + "stddev_ts": 0.000382, + "samples_ns": [ + 25951753030, + 25952227107, + 25951221909 + ], + "samples_ts": [ + 19.7289, + 19.7286, + 19.7293 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:13:43Z", + "avg_ns": 12227357224, + "stddev_ns": 1386971, + "avg_ts": 10.468329, + "stddev_ts": 0.001184, + "samples_ns": [ + 12228826863, + 12227162390, + 12226082420 + ], + "samples_ts": [ + 10.4671, + 10.4685, + 10.4694 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 754 + }, + { + "timestamp_utc": "2025-12-09T13:18:33.356537+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:14:21Z\",\n \"avg_ns\": 25994028362,\n \"stddev_ns\": 77548380,\n \"avg_ts\": 19.696948,\n \"stddev_ts\": 0.058661,\n \"samples_ns\": [ 26083557776, 25950699783, 25947827529 ],\n \"samples_ts\": [ 19.6292, 19.7297, 19.7319 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:16:05Z\",\n \"avg_ns\": 49139985255,\n \"stddev_ns\": 2429555,\n \"avg_ts\": 10.419214,\n \"stddev_ts\": 0.000511,\n \"samples_ns\": [ 49141172335, 49141570613, 49137212819 ],\n \"samples_ts\": [ 10.419, 10.4189, 10.4198 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:14:21Z", + "avg_ns": 25994028362, + "stddev_ns": 77548380, + "avg_ts": 19.696948, + "stddev_ts": 0.058661, + "samples_ns": [ + 26083557776, + 25950699783, + 25947827529 + ], + "samples_ts": [ + 19.6292, + 19.7297, + 19.7319 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:16:05Z", + "avg_ns": 49139985255, + "stddev_ns": 2429555, + "avg_ts": 10.419214, + "stddev_ts": 0.000511, + "samples_ns": [ + 49141172335, + 49141570613, + 49137212819 + ], + "samples_ts": [ + 10.419, + 10.4189, + 10.4198 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 755 + }, + { + "timestamp_utc": "2025-12-09T13:19:12.308207+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:18:34Z\",\n \"avg_ns\": 3204916757,\n \"stddev_ns\": 2125152,\n \"avg_ts\": 39.938647,\n \"stddev_ts\": 0.026463,\n \"samples_ns\": [ 3203632545, 3203748845, 3207368882 ],\n \"samples_ts\": [ 39.9546, 39.9532, 39.9081 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:18:47Z\",\n \"avg_ns\": 8336656310,\n \"stddev_ns\": 762384,\n \"avg_ts\": 15.353878,\n \"stddev_ts\": 0.001404,\n \"samples_ns\": [ 8336279459, 8336155738, 8337533733 ],\n \"samples_ts\": [ 15.3546, 15.3548, 15.3523 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:18:34Z", + "avg_ns": 3204916757, + "stddev_ns": 2125152, + "avg_ts": 39.938647, + "stddev_ts": 0.026463, + "samples_ns": [ + 3203632545, + 3203748845, + 3207368882 + ], + "samples_ts": [ + 39.9546, + 39.9532, + 39.9081 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:18:47Z", + "avg_ns": 8336656310, + "stddev_ns": 762384, + "avg_ts": 15.353878, + "stddev_ts": 0.001404, + "samples_ns": [ + 8336279459, + 8336155738, + 8337533733 + ], + "samples_ts": [ + 15.3546, + 15.3548, + 15.3523 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 756 + }, + { + "timestamp_utc": "2025-12-09T13:21:07.300403+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:19:13Z\",\n \"avg_ns\": 3206116187,\n \"stddev_ns\": 3883829,\n \"avg_ts\": 39.923733,\n \"stddev_ts\": 0.048319,\n \"samples_ns\": [ 3210597035, 3203737104, 3204014424 ],\n \"samples_ts\": [ 39.868, 39.9533, 39.9499 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:19:26Z\",\n \"avg_ns\": 33667636362,\n \"stddev_ns\": 7901595,\n \"avg_ts\": 15.207483,\n \"stddev_ts\": 0.003568,\n \"samples_ns\": [ 33661866390, 33676639527, 33664403170 ],\n \"samples_ts\": [ 15.2101, 15.2034, 15.2089 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:19:13Z", + "avg_ns": 3206116187, + "stddev_ns": 3883829, + "avg_ts": 39.923733, + "stddev_ts": 0.048319, + "samples_ns": [ + 3210597035, + 3203737104, + 3204014424 + ], + "samples_ts": [ + 39.868, + 39.9533, + 39.9499 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:19:26Z", + "avg_ns": 33667636362, + "stddev_ns": 7901595, + "avg_ts": 15.207483, + "stddev_ts": 0.003568, + "samples_ns": [ + 33661866390, + 33676639527, + 33664403170 + ], + "samples_ts": [ + 15.2101, + 15.2034, + 15.2089 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 757 + }, + { + "timestamp_utc": "2025-12-09T13:22:25.210031+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:21:08Z\",\n \"avg_ns\": 12909439466,\n \"stddev_ns\": 2770405,\n \"avg_ts\": 39.660902,\n \"stddev_ts\": 0.008496,\n \"samples_ns\": [ 12912598306, 12907453072, 12908267022 ],\n \"samples_ts\": [ 39.6512, 39.667, 39.6645 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:22:00Z\",\n \"avg_ns\": 8344072278,\n \"stddev_ns\": 3610725,\n \"avg_ts\": 15.340233,\n \"stddev_ts\": 0.006637,\n \"samples_ns\": [ 8348180395, 8341401882, 8342634557 ],\n \"samples_ts\": [ 15.3327, 15.3451, 15.3429 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:21:08Z", + "avg_ns": 12909439466, + "stddev_ns": 2770405, + "avg_ts": 39.660902, + "stddev_ts": 0.008496, + "samples_ns": [ + 12912598306, + 12907453072, + 12908267022 + ], + "samples_ts": [ + 39.6512, + 39.667, + 39.6645 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:22:00Z", + "avg_ns": 8344072278, + "stddev_ns": 3610725, + "avg_ts": 15.340233, + "stddev_ts": 0.006637, + "samples_ns": [ + 8348180395, + 8341401882, + 8342634557 + ], + "samples_ts": [ + 15.3327, + 15.3451, + 15.3429 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 758 + }, + { + "timestamp_utc": "2025-12-09T13:25:00.042891+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:22:26Z\",\n \"avg_ns\": 12978802977,\n \"stddev_ns\": 99446918,\n \"avg_ts\": 39.450476,\n \"stddev_ts\": 0.301051,\n \"samples_ns\": [ 13092641514, 12934931863, 12908835556 ],\n \"samples_ts\": [ 39.1059, 39.5827, 39.6628 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:23:18Z\",\n \"avg_ns\": 33889997692,\n \"stddev_ns\": 7512406,\n \"avg_ts\": 15.107703,\n \"stddev_ts\": 0.003348,\n \"samples_ns\": [ 33884325484, 33887152960, 33898514633 ],\n \"samples_ts\": [ 15.1102, 15.109, 15.1039 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:22:26Z", + "avg_ns": 12978802977, + "stddev_ns": 99446918, + "avg_ts": 39.450476, + "stddev_ts": 0.301051, + "samples_ns": [ + 13092641514, + 12934931863, + 12908835556 + ], + "samples_ts": [ + 39.1059, + 39.5827, + 39.6628 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:23:18Z", + "avg_ns": 33889997692, + "stddev_ns": 7512406, + "avg_ts": 15.107703, + "stddev_ts": 0.003348, + "samples_ns": [ + 33884325484, + 33887152960, + 33898514633 + ], + "samples_ts": [ + 15.1102, + 15.109, + 15.1039 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 759 + }, + { + "timestamp_utc": "2025-12-09T13:25:39.167302+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:25:01Z\",\n \"avg_ns\": 3203975259,\n \"stddev_ns\": 254247,\n \"avg_ts\": 39.950371,\n \"stddev_ts\": 0.003009,\n \"samples_ns\": [ 3204250131, 3203798221, 3203877427 ],\n \"samples_ts\": [ 39.9469, 39.9526, 39.9516 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:25:13Z\",\n \"avg_ns\": 8394300693,\n \"stddev_ns\": 5424963,\n \"avg_ts\": 15.248445,\n \"stddev_ts\": 0.009848,\n \"samples_ns\": [ 8400541532, 8390730395, 8391630154 ],\n \"samples_ts\": [ 15.2371, 15.2549, 15.2533 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:25:01Z", + "avg_ns": 3203975259, + "stddev_ns": 254247, + "avg_ts": 39.950371, + "stddev_ts": 0.003009, + "samples_ns": [ + 3204250131, + 3203798221, + 3203877427 + ], + "samples_ts": [ + 39.9469, + 39.9526, + 39.9516 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:25:13Z", + "avg_ns": 8394300693, + "stddev_ns": 5424963, + "avg_ts": 15.248445, + "stddev_ts": 0.009848, + "samples_ns": [ + 8400541532, + 8390730395, + 8391630154 + ], + "samples_ts": [ + 15.2371, + 15.2549, + 15.2533 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 760 + }, + { + "timestamp_utc": "2025-12-09T13:27:34.020836+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:25:40Z\",\n \"avg_ns\": 3203439159,\n \"stddev_ns\": 937563,\n \"avg_ts\": 39.957059,\n \"stddev_ts\": 0.011650,\n \"samples_ns\": [ 3202725448, 3203095601, 3204496430 ],\n \"samples_ts\": [ 39.966, 39.9613, 39.9439 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:25:52Z\",\n \"avg_ns\": 33628625587,\n \"stddev_ns\": 50556956,\n \"avg_ts\": 15.225147,\n \"stddev_ts\": 0.022869,\n \"samples_ns\": [ 33686972956, 33601072713, 33597831094 ],\n \"samples_ts\": [ 15.1988, 15.2376, 15.2391 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:25:40Z", + "avg_ns": 3203439159, + "stddev_ns": 937563, + "avg_ts": 39.957059, + "stddev_ts": 0.01165, + "samples_ns": [ + 3202725448, + 3203095601, + 3204496430 + ], + "samples_ts": [ + 39.966, + 39.9613, + 39.9439 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:25:52Z", + "avg_ns": 33628625587, + "stddev_ns": 50556956, + "avg_ts": 15.225147, + "stddev_ts": 0.022869, + "samples_ns": [ + 33686972956, + 33601072713, + 33597831094 + ], + "samples_ts": [ + 15.1988, + 15.2376, + 15.2391 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 761 + }, + { + "timestamp_utc": "2025-12-09T13:28:51.939493+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:27:35Z\",\n \"avg_ns\": 12945774651,\n \"stddev_ns\": 12817736,\n \"avg_ts\": 39.549610,\n \"stddev_ts\": 0.039134,\n \"samples_ns\": [ 12939402236, 12960528604, 12937393115 ],\n \"samples_ts\": [ 39.5691, 39.5046, 39.5752 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:28:26Z\",\n \"avg_ns\": 8334541529,\n \"stddev_ns\": 2172631,\n \"avg_ts\": 15.357774,\n \"stddev_ts\": 0.003996,\n \"samples_ns\": [ 8332869224, 8336992057, 8333763308 ],\n \"samples_ts\": [ 15.3609, 15.3533, 15.3592 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:27:35Z", + "avg_ns": 12945774651, + "stddev_ns": 12817736, + "avg_ts": 39.54961, + "stddev_ts": 0.039134, + "samples_ns": [ + 12939402236, + 12960528604, + 12937393115 + ], + "samples_ts": [ + 39.5691, + 39.5046, + 39.5752 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:28:26Z", + "avg_ns": 8334541529, + "stddev_ns": 2172631, + "avg_ts": 15.357774, + "stddev_ts": 0.003996, + "samples_ns": [ + 8332869224, + 8336992057, + 8333763308 + ], + "samples_ts": [ + 15.3609, + 15.3533, + 15.3592 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 762 + }, + { + "timestamp_utc": "2025-12-09T13:31:25.744062+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:28:52Z\",\n \"avg_ns\": 12937467763,\n \"stddev_ns\": 4227011,\n \"avg_ts\": 39.574981,\n \"stddev_ts\": 0.012923,\n \"samples_ns\": [ 12942309752, 12935567392, 12934526146 ],\n \"samples_ts\": [ 39.5602, 39.5808, 39.584 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:29:44Z\",\n \"avg_ns\": 33642343593,\n \"stddev_ns\": 4123929,\n \"avg_ts\": 15.218916,\n \"stddev_ts\": 0.001862,\n \"samples_ns\": [ 33641702541, 33638585971, 33646742269 ],\n \"samples_ts\": [ 15.2192, 15.2206, 15.2169 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:28:52Z", + "avg_ns": 12937467763, + "stddev_ns": 4227011, + "avg_ts": 39.574981, + "stddev_ts": 0.012923, + "samples_ns": [ + 12942309752, + 12935567392, + 12934526146 + ], + "samples_ts": [ + 39.5602, + 39.5808, + 39.584 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:29:44Z", + "avg_ns": 33642343593, + "stddev_ns": 4123929, + "avg_ts": 15.218916, + "stddev_ts": 0.001862, + "samples_ns": [ + 33641702541, + 33638585971, + 33646742269 + ], + "samples_ts": [ + 15.2192, + 15.2206, + 15.2169 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 763 + }, + { + "timestamp_utc": "2025-12-09T13:32:04.628682+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:31:26Z\",\n \"avg_ns\": 3202415619,\n \"stddev_ns\": 439808,\n \"avg_ts\": 39.969828,\n \"stddev_ts\": 0.005397,\n \"samples_ns\": [ 3202913318, 3202202106, 3202131435 ],\n \"samples_ts\": [ 39.9636, 39.9725, 39.9734 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:31:39Z\",\n \"avg_ns\": 8316470552,\n \"stddev_ns\": 5766830,\n \"avg_ts\": 15.391149,\n \"stddev_ts\": 0.010668,\n \"samples_ns\": [ 8315262120, 8322744378, 8311405160 ],\n \"samples_ts\": [ 15.3934, 15.3795, 15.4005 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:31:26Z", + "avg_ns": 3202415619, + "stddev_ns": 439808, + "avg_ts": 39.969828, + "stddev_ts": 0.005397, + "samples_ns": [ + 3202913318, + 3202202106, + 3202131435 + ], + "samples_ts": [ + 39.9636, + 39.9725, + 39.9734 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:31:39Z", + "avg_ns": 8316470552, + "stddev_ns": 5766830, + "avg_ts": 15.391149, + "stddev_ts": 0.010668, + "samples_ns": [ + 8315262120, + 8322744378, + 8311405160 + ], + "samples_ts": [ + 15.3934, + 15.3795, + 15.4005 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 764 + }, + { + "timestamp_utc": "2025-12-09T13:33:59.921814+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:32:05Z\",\n \"avg_ns\": 3202577157,\n \"stddev_ns\": 312587,\n \"avg_ts\": 39.967812,\n \"stddev_ts\": 0.003771,\n \"samples_ns\": [ 3202551905, 3202891160, 3202288408 ],\n \"samples_ts\": [ 39.9681, 39.9639, 39.9714 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:32:18Z\",\n \"avg_ns\": 33785009801,\n \"stddev_ns\": 112950950,\n \"avg_ts\": 15.154763,\n \"stddev_ts\": 0.050568,\n \"samples_ns\": [ 33721428761, 33915420493, 33718180151 ],\n \"samples_ts\": [ 15.1832, 15.0964, 15.1847 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:32:05Z", + "avg_ns": 3202577157, + "stddev_ns": 312587, + "avg_ts": 39.967812, + "stddev_ts": 0.003771, + "samples_ns": [ + 3202551905, + 3202891160, + 3202288408 + ], + "samples_ts": [ + 39.9681, + 39.9639, + 39.9714 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:32:18Z", + "avg_ns": 33785009801, + "stddev_ns": 112950950, + "avg_ts": 15.154763, + "stddev_ts": 0.050568, + "samples_ns": [ + 33721428761, + 33915420493, + 33718180151 + ], + "samples_ts": [ + 15.1832, + 15.0964, + 15.1847 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 765 + }, + { + "timestamp_utc": "2025-12-09T13:35:18.784214+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:34:00Z\",\n \"avg_ns\": 13141816258,\n \"stddev_ns\": 217001,\n \"avg_ts\": 38.959607,\n \"stddev_ts\": 0.000643,\n \"samples_ns\": [ 13141951334, 13141931492, 13141565948 ],\n \"samples_ts\": [ 38.9592, 38.9593, 38.9603 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:34:53Z\",\n \"avg_ns\": 8378079757,\n \"stddev_ns\": 2334885,\n \"avg_ts\": 15.277965,\n \"stddev_ts\": 0.004258,\n \"samples_ns\": [ 8378851033, 8379931426, 8375456812 ],\n \"samples_ts\": [ 15.2766, 15.2746, 15.2827 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:34:00Z", + "avg_ns": 13141816258, + "stddev_ns": 217001, + "avg_ts": 38.959607, + "stddev_ts": 0.000643, + "samples_ns": [ + 13141951334, + 13141931492, + 13141565948 + ], + "samples_ts": [ + 38.9592, + 38.9593, + 38.9603 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:34:53Z", + "avg_ns": 8378079757, + "stddev_ns": 2334885, + "avg_ts": 15.277965, + "stddev_ts": 0.004258, + "samples_ns": [ + 8378851033, + 8379931426, + 8375456812 + ], + "samples_ts": [ + 15.2766, + 15.2746, + 15.2827 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 766 + }, + { + "timestamp_utc": "2025-12-09T13:37:54.275945+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:35:19Z\",\n \"avg_ns\": 13145441266,\n \"stddev_ns\": 1179926,\n \"avg_ts\": 38.948864,\n \"stddev_ts\": 0.003463,\n \"samples_ns\": [ 13146759333, 13144531314, 13145033153 ],\n \"samples_ts\": [ 38.945, 38.9516, 38.9501 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:36:12Z\",\n \"avg_ns\": 33925458567,\n \"stddev_ns\": 3841578,\n \"avg_ts\": 15.091911,\n \"stddev_ts\": 0.001707,\n \"samples_ns\": [ 33921246729, 33926373221, 33928755752 ],\n \"samples_ts\": [ 15.0938, 15.0915, 15.0904 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:35:19Z", + "avg_ns": 13145441266, + "stddev_ns": 1179926, + "avg_ts": 38.948864, + "stddev_ts": 0.003463, + "samples_ns": [ + 13146759333, + 13144531314, + 13145033153 + ], + "samples_ts": [ + 38.945, + 38.9516, + 38.9501 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:36:12Z", + "avg_ns": 33925458567, + "stddev_ns": 3841578, + "avg_ts": 15.091911, + "stddev_ts": 0.001707, + "samples_ns": [ + 33921246729, + 33926373221, + 33928755752 + ], + "samples_ts": [ + 15.0938, + 15.0915, + 15.0904 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 767 + }, + { + "timestamp_utc": "2025-12-09T13:38:33.420530+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:37:55Z\",\n \"avg_ns\": 3201767600,\n \"stddev_ns\": 191655,\n \"avg_ts\": 39.977917,\n \"stddev_ts\": 0.002286,\n \"samples_ns\": [ 3201657717, 3201978985, 3201666099 ],\n \"samples_ts\": [ 39.9793, 39.9753, 39.9792 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:38:08Z\",\n \"avg_ns\": 8405159648,\n \"stddev_ns\": 6679693,\n \"avg_ts\": 15.228747,\n \"stddev_ts\": 0.012095,\n \"samples_ns\": [ 8400289315, 8402416845, 8412772786 ],\n \"samples_ts\": [ 15.2376, 15.2337, 15.215 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:37:55Z", + "avg_ns": 3201767600, + "stddev_ns": 191655, + "avg_ts": 39.977917, + "stddev_ts": 0.002286, + "samples_ns": [ + 3201657717, + 3201978985, + 3201666099 + ], + "samples_ts": [ + 39.9793, + 39.9753, + 39.9792 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:38:08Z", + "avg_ns": 8405159648, + "stddev_ns": 6679693, + "avg_ts": 15.228747, + "stddev_ts": 0.012095, + "samples_ns": [ + 8400289315, + 8402416845, + 8412772786 + ], + "samples_ts": [ + 15.2376, + 15.2337, + 15.215 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 768 + }, + { + "timestamp_utc": "2025-12-09T13:40:28.955996+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:38:34Z\",\n \"avg_ns\": 3202027689,\n \"stddev_ns\": 196681,\n \"avg_ts\": 39.974670,\n \"stddev_ts\": 0.002243,\n \"samples_ns\": [ 3202030988, 3201846398, 3202205683 ],\n \"samples_ts\": [ 39.9746, 39.9769, 39.9724 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:38:47Z\",\n \"avg_ns\": 33851291252,\n \"stddev_ns\": 122163940,\n \"avg_ts\": 15.125108,\n \"stddev_ts\": 0.054674,\n \"samples_ns\": [ 33893554307, 33946711731, 33713607719 ],\n \"samples_ts\": [ 15.1061, 15.0825, 15.1867 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:38:34Z", + "avg_ns": 3202027689, + "stddev_ns": 196681, + "avg_ts": 39.97467, + "stddev_ts": 0.002243, + "samples_ns": [ + 3202030988, + 3201846398, + 3202205683 + ], + "samples_ts": [ + 39.9746, + 39.9769, + 39.9724 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:38:47Z", + "avg_ns": 33851291252, + "stddev_ns": 122163940, + "avg_ts": 15.125108, + "stddev_ts": 0.054674, + "samples_ns": [ + 33893554307, + 33946711731, + 33713607719 + ], + "samples_ts": [ + 15.1061, + 15.0825, + 15.1867 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 769 + }, + { + "timestamp_utc": "2025-12-09T13:41:46.888475+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:40:29Z\",\n \"avg_ns\": 12908304023,\n \"stddev_ns\": 688403,\n \"avg_ts\": 39.664390,\n \"stddev_ts\": 0.002115,\n \"samples_ns\": [ 12908217102, 12909031759, 12907663208 ],\n \"samples_ts\": [ 39.6647, 39.6622, 39.6664 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:41:21Z\",\n \"avg_ns\": 8388277285,\n \"stddev_ns\": 1886228,\n \"avg_ts\": 15.259391,\n \"stddev_ts\": 0.003431,\n \"samples_ns\": [ 8386667865, 8387811097, 8390352893 ],\n \"samples_ts\": [ 15.2623, 15.2602, 15.2556 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:40:29Z", + "avg_ns": 12908304023, + "stddev_ns": 688403, + "avg_ts": 39.66439, + "stddev_ts": 0.002115, + "samples_ns": [ + 12908217102, + 12909031759, + 12907663208 + ], + "samples_ts": [ + 39.6647, + 39.6622, + 39.6664 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:41:21Z", + "avg_ns": 8388277285, + "stddev_ns": 1886228, + "avg_ts": 15.259391, + "stddev_ts": 0.003431, + "samples_ns": [ + 8386667865, + 8387811097, + 8390352893 + ], + "samples_ts": [ + 15.2623, + 15.2602, + 15.2556 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 770 + }, + { + "timestamp_utc": "2025-12-09T13:44:20.765755+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:41:47Z\",\n \"avg_ns\": 12913534970,\n \"stddev_ns\": 2108460,\n \"avg_ts\": 39.648323,\n \"stddev_ts\": 0.006473,\n \"samples_ns\": [ 12911664999, 12915820145, 12913119766 ],\n \"samples_ts\": [ 39.6541, 39.6413, 39.6496 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:42:39Z\",\n \"avg_ns\": 33698147367,\n \"stddev_ns\": 2955017,\n \"avg_ts\": 15.193714,\n \"stddev_ts\": 0.001332,\n \"samples_ns\": [ 33701159397, 33695252869, 33698029835 ],\n \"samples_ts\": [ 15.1924, 15.195, 15.1938 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:41:47Z", + "avg_ns": 12913534970, + "stddev_ns": 2108460, + "avg_ts": 39.648323, + "stddev_ts": 0.006473, + "samples_ns": [ + 12911664999, + 12915820145, + 12913119766 + ], + "samples_ts": [ + 39.6541, + 39.6413, + 39.6496 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:42:39Z", + "avg_ns": 33698147367, + "stddev_ns": 2955017, + "avg_ts": 15.193714, + "stddev_ts": 0.001332, + "samples_ns": [ + 33701159397, + 33695252869, + 33698029835 + ], + "samples_ts": [ + 15.1924, + 15.195, + 15.1938 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 771 + }, + { + "timestamp_utc": "2025-12-09T13:44:59.757028+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:44:21Z\",\n \"avg_ns\": 3204865293,\n \"stddev_ns\": 2647978,\n \"avg_ts\": 39.939294,\n \"stddev_ts\": 0.032976,\n \"samples_ns\": [ 3207921683, 3203287657, 3203386540 ],\n \"samples_ts\": [ 39.9012, 39.9589, 39.9577 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:44:34Z\",\n \"avg_ns\": 8348528903,\n \"stddev_ns\": 1065755,\n \"avg_ts\": 15.332043,\n \"stddev_ts\": 0.001957,\n \"samples_ns\": [ 8347305022, 8349252298, 8349029389 ],\n \"samples_ts\": [ 15.3343, 15.3307, 15.3311 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:44:21Z", + "avg_ns": 3204865293, + "stddev_ns": 2647978, + "avg_ts": 39.939294, + "stddev_ts": 0.032976, + "samples_ns": [ + 3207921683, + 3203287657, + 3203386540 + ], + "samples_ts": [ + 39.9012, + 39.9589, + 39.9577 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:44:34Z", + "avg_ns": 8348528903, + "stddev_ns": 1065755, + "avg_ts": 15.332043, + "stddev_ts": 0.001957, + "samples_ns": [ + 8347305022, + 8349252298, + 8349029389 + ], + "samples_ts": [ + 15.3343, + 15.3307, + 15.3311 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 772 + }, + { + "timestamp_utc": "2025-12-09T13:46:55.262897+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:45:00Z\",\n \"avg_ns\": 3203822616,\n \"stddev_ns\": 477633,\n \"avg_ts\": 39.952275,\n \"stddev_ts\": 0.005956,\n \"samples_ns\": [ 3204371122, 3203598256, 3203498470 ],\n \"samples_ts\": [ 39.9454, 39.9551, 39.9563 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:45:13Z\",\n \"avg_ns\": 33855015564,\n \"stddev_ns\": 2231671,\n \"avg_ts\": 15.123313,\n \"stddev_ts\": 0.000990,\n \"samples_ns\": [ 33855587019, 33852569345, 33856890330 ],\n \"samples_ts\": [ 15.1231, 15.1244, 15.1225 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:45:00Z", + "avg_ns": 3203822616, + "stddev_ns": 477633, + "avg_ts": 39.952275, + "stddev_ts": 0.005956, + "samples_ns": [ + 3204371122, + 3203598256, + 3203498470 + ], + "samples_ts": [ + 39.9454, + 39.9551, + 39.9563 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:45:13Z", + "avg_ns": 33855015564, + "stddev_ns": 2231671, + "avg_ts": 15.123313, + "stddev_ts": 0.00099, + "samples_ns": [ + 33855587019, + 33852569345, + 33856890330 + ], + "samples_ts": [ + 15.1231, + 15.1244, + 15.1225 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 773 + }, + { + "timestamp_utc": "2025-12-09T13:48:13.174186+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:46:56Z\",\n \"avg_ns\": 12935793867,\n \"stddev_ns\": 1851972,\n \"avg_ts\": 39.580099,\n \"stddev_ts\": 0.005645,\n \"samples_ns\": [ 12937922081, 12934646316, 12934813206 ],\n \"samples_ts\": [ 39.5736, 39.5836, 39.5831 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:47:47Z\",\n \"avg_ns\": 8342371262,\n \"stddev_ns\": 2416197,\n \"avg_ts\": 15.343360,\n \"stddev_ts\": 0.004440,\n \"samples_ns\": [ 8345156154, 8341092616, 8340865017 ],\n \"samples_ts\": [ 15.3382, 15.3457, 15.3461 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:46:56Z", + "avg_ns": 12935793867, + "stddev_ns": 1851972, + "avg_ts": 39.580099, + "stddev_ts": 0.005645, + "samples_ns": [ + 12937922081, + 12934646316, + 12934813206 + ], + "samples_ts": [ + 39.5736, + 39.5836, + 39.5831 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:47:47Z", + "avg_ns": 8342371262, + "stddev_ns": 2416197, + "avg_ts": 15.34336, + "stddev_ts": 0.00444, + "samples_ns": [ + 8345156154, + 8341092616, + 8340865017 + ], + "samples_ts": [ + 15.3382, + 15.3457, + 15.3461 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 774 + }, + { + "timestamp_utc": "2025-12-09T13:50:47.303204+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:48:14Z\",\n \"avg_ns\": 12930581882,\n \"stddev_ns\": 323612,\n \"avg_ts\": 39.596053,\n \"stddev_ts\": 0.000928,\n \"samples_ns\": [ 12930251007, 12930648903, 12930845737 ],\n \"samples_ts\": [ 39.5971, 39.5958, 39.5952 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:49:05Z\",\n \"avg_ns\": 33752334050,\n \"stddev_ns\": 4534926,\n \"avg_ts\": 15.169321,\n \"stddev_ts\": 0.002035,\n \"samples_ns\": [ 33751253558, 33757304021, 33748444573 ],\n \"samples_ts\": [ 15.1698, 15.1671, 15.1711 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:48:14Z", + "avg_ns": 12930581882, + "stddev_ns": 323612, + "avg_ts": 39.596053, + "stddev_ts": 0.000928, + "samples_ns": [ + 12930251007, + 12930648903, + 12930845737 + ], + "samples_ts": [ + 39.5971, + 39.5958, + 39.5952 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:49:05Z", + "avg_ns": 33752334050, + "stddev_ns": 4534926, + "avg_ts": 15.169321, + "stddev_ts": 0.002035, + "samples_ns": [ + 33751253558, + 33757304021, + 33748444573 + ], + "samples_ts": [ + 15.1698, + 15.1671, + 15.1711 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 775 + }, + { + "timestamp_utc": "2025-12-09T13:51:26.253299+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:50:48Z\",\n \"avg_ns\": 3205667375,\n \"stddev_ns\": 2970998,\n \"avg_ts\": 39.929306,\n \"stddev_ts\": 0.036980,\n \"samples_ns\": [ 3203830675, 3204077035, 3209094416 ],\n \"samples_ts\": [ 39.9522, 39.9491, 39.8866 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:51:01Z\",\n \"avg_ns\": 8329877655,\n \"stddev_ns\": 1303285,\n \"avg_ts\": 15.366372,\n \"stddev_ts\": 0.002398,\n \"samples_ns\": [ 8331298021, 8329588357, 8328746588 ],\n \"samples_ts\": [ 15.3638, 15.3669, 15.3685 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:50:48Z", + "avg_ns": 3205667375, + "stddev_ns": 2970998, + "avg_ts": 39.929306, + "stddev_ts": 0.03698, + "samples_ns": [ + 3203830675, + 3204077035, + 3209094416 + ], + "samples_ts": [ + 39.9522, + 39.9491, + 39.8866 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:51:01Z", + "avg_ns": 8329877655, + "stddev_ns": 1303285, + "avg_ts": 15.366372, + "stddev_ts": 0.002398, + "samples_ns": [ + 8331298021, + 8329588357, + 8328746588 + ], + "samples_ts": [ + 15.3638, + 15.3669, + 15.3685 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 776 + }, + { + "timestamp_utc": "2025-12-09T13:53:22.261922+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:51:27Z\",\n \"avg_ns\": 3205710592,\n \"stddev_ns\": 4046042,\n \"avg_ts\": 39.928787,\n \"stddev_ts\": 0.050349,\n \"samples_ns\": [ 3210381109, 3203314035, 3203436634 ],\n \"samples_ts\": [ 39.8707, 39.9586, 39.9571 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:51:40Z\",\n \"avg_ns\": 34010607226,\n \"stddev_ns\": 13433602,\n \"avg_ts\": 15.054129,\n \"stddev_ts\": 0.005946,\n \"samples_ns\": [ 34024235433, 33997377076, 34010209169 ],\n \"samples_ts\": [ 15.0481, 15.06, 15.0543 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:51:27Z", + "avg_ns": 3205710592, + "stddev_ns": 4046042, + "avg_ts": 39.928787, + "stddev_ts": 0.050349, + "samples_ns": [ + 3210381109, + 3203314035, + 3203436634 + ], + "samples_ts": [ + 39.8707, + 39.9586, + 39.9571 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:51:40Z", + "avg_ns": 34010607226, + "stddev_ns": 13433602, + "avg_ts": 15.054129, + "stddev_ts": 0.005946, + "samples_ns": [ + 34024235433, + 33997377076, + 34010209169 + ], + "samples_ts": [ + 15.0481, + 15.06, + 15.0543 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 777 + }, + { + "timestamp_utc": "2025-12-09T13:54:40.966797+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:53:23Z\",\n \"avg_ns\": 13146391910,\n \"stddev_ns\": 1451842,\n \"avg_ts\": 38.946048,\n \"stddev_ts\": 0.004301,\n \"samples_ns\": [ 13148040830, 13145829449, 13145305451 ],\n \"samples_ts\": [ 38.9412, 38.9477, 38.9493 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:54:15Z\",\n \"avg_ns\": 8320969203,\n \"stddev_ns\": 765648,\n \"avg_ts\": 15.382824,\n \"stddev_ts\": 0.001416,\n \"samples_ns\": [ 8321424942, 8320085251, 8321397416 ],\n \"samples_ts\": [ 15.382, 15.3845, 15.382 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:53:23Z", + "avg_ns": 13146391910, + "stddev_ns": 1451842, + "avg_ts": 38.946048, + "stddev_ts": 0.004301, + "samples_ns": [ + 13148040830, + 13145829449, + 13145305451 + ], + "samples_ts": [ + 38.9412, + 38.9477, + 38.9493 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:54:15Z", + "avg_ns": 8320969203, + "stddev_ns": 765648, + "avg_ts": 15.382824, + "stddev_ts": 0.001416, + "samples_ns": [ + 8321424942, + 8320085251, + 8321397416 + ], + "samples_ts": [ + 15.382, + 15.3845, + 15.382 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 778 + }, + { + "timestamp_utc": "2025-12-09T13:57:17.292286+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:54:42Z\",\n \"avg_ns\": 13329747887,\n \"stddev_ns\": 1774446,\n \"avg_ts\": 38.410329,\n \"stddev_ts\": 0.005102,\n \"samples_ns\": [ 13328656859, 13331790924, 13328795879 ],\n \"samples_ts\": [ 38.4135, 38.4044, 38.4131 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:55:35Z\",\n \"avg_ns\": 33927118317,\n \"stddev_ns\": 7548546,\n \"avg_ts\": 15.091173,\n \"stddev_ts\": 0.003357,\n \"samples_ns\": [ 33935755646, 33923813537, 33921785768 ],\n \"samples_ts\": [ 15.0873, 15.0926, 15.0935 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:54:42Z", + "avg_ns": 13329747887, + "stddev_ns": 1774446, + "avg_ts": 38.410329, + "stddev_ts": 0.005102, + "samples_ns": [ + 13328656859, + 13331790924, + 13328795879 + ], + "samples_ts": [ + 38.4135, + 38.4044, + 38.4131 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:55:35Z", + "avg_ns": 33927118317, + "stddev_ns": 7548546, + "avg_ts": 15.091173, + "stddev_ts": 0.003357, + "samples_ns": [ + 33935755646, + 33923813537, + 33921785768 + ], + "samples_ts": [ + 15.0873, + 15.0926, + 15.0935 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 779 + }, + { + "timestamp_utc": "2025-12-09T13:57:56.251790+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:57:18Z\",\n \"avg_ns\": 3203345595,\n \"stddev_ns\": 188729,\n \"avg_ts\": 39.958224,\n \"stddev_ts\": 0.002354,\n \"samples_ns\": [ 3203242036, 3203231316, 3203563433 ],\n \"samples_ts\": [ 39.9595, 39.9596, 39.9555 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:57:31Z\",\n \"avg_ns\": 8335573959,\n \"stddev_ns\": 7647930,\n \"avg_ts\": 15.355880,\n \"stddev_ts\": 0.014081,\n \"samples_ns\": [ 8344404112, 8331220644, 8331097122 ],\n \"samples_ts\": [ 15.3396, 15.3639, 15.3641 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:57:18Z", + "avg_ns": 3203345595, + "stddev_ns": 188729, + "avg_ts": 39.958224, + "stddev_ts": 0.002354, + "samples_ns": [ + 3203242036, + 3203231316, + 3203563433 + ], + "samples_ts": [ + 39.9595, + 39.9596, + 39.9555 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T13:57:31Z", + "avg_ns": 8335573959, + "stddev_ns": 7647930, + "avg_ts": 15.35588, + "stddev_ts": 0.014081, + "samples_ns": [ + 8344404112, + 8331220644, + 8331097122 + ], + "samples_ts": [ + 15.3396, + 15.3639, + 15.3641 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 780 + }, + { + "timestamp_utc": "2025-12-09T13:59:51.099838+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:57:57Z\",\n \"avg_ns\": 3205980486,\n \"stddev_ns\": 2444837,\n \"avg_ts\": 39.925399,\n \"stddev_ts\": 0.030425,\n \"samples_ns\": [ 3204572725, 3208802782, 3204565952 ],\n \"samples_ts\": [ 39.9429, 39.8903, 39.943 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:58:10Z\",\n \"avg_ns\": 33628318517,\n \"stddev_ns\": 6131019,\n \"avg_ts\": 15.225264,\n \"stddev_ts\": 0.002775,\n \"samples_ns\": [ 33634486203, 33628238923, 33622230426 ],\n \"samples_ts\": [ 15.2225, 15.2253, 15.228 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:57:57Z", + "avg_ns": 3205980486, + "stddev_ns": 2444837, + "avg_ts": 39.925399, + "stddev_ts": 0.030425, + "samples_ns": [ + 3204572725, + 3208802782, + 3204565952 + ], + "samples_ts": [ + 39.9429, + 39.8903, + 39.943 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T13:58:10Z", + "avg_ns": 33628318517, + "stddev_ns": 6131019, + "avg_ts": 15.225264, + "stddev_ts": 0.002775, + "samples_ns": [ + 33634486203, + 33628238923, + 33622230426 + ], + "samples_ts": [ + 15.2225, + 15.2253, + 15.228 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 781 + }, + { + "timestamp_utc": "2025-12-09T14:01:09.103111+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T13:59:52Z\",\n \"avg_ns\": 12906931517,\n \"stddev_ns\": 2313595,\n \"avg_ts\": 39.668608,\n \"stddev_ts\": 0.007110,\n \"samples_ns\": [ 12909602999, 12905606419, 12905585133 ],\n \"samples_ts\": [ 39.6604, 39.6727, 39.6727 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:00:43Z\",\n \"avg_ns\": 8410714302,\n \"stddev_ns\": 3572149,\n \"avg_ts\": 15.218685,\n \"stddev_ts\": 0.006460,\n \"samples_ns\": [ 8414781351, 8409269148, 8408092408 ],\n \"samples_ts\": [ 15.2113, 15.2213, 15.2234 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T13:59:52Z", + "avg_ns": 12906931517, + "stddev_ns": 2313595, + "avg_ts": 39.668608, + "stddev_ts": 0.00711, + "samples_ns": [ + 12909602999, + 12905606419, + 12905585133 + ], + "samples_ts": [ + 39.6604, + 39.6727, + 39.6727 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:00:43Z", + "avg_ns": 8410714302, + "stddev_ns": 3572149, + "avg_ts": 15.218685, + "stddev_ts": 0.00646, + "samples_ns": [ + 8414781351, + 8409269148, + 8408092408 + ], + "samples_ts": [ + 15.2113, + 15.2213, + 15.2234 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 782 + }, + { + "timestamp_utc": "2025-12-09T14:03:43.109985+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:01:10Z\",\n \"avg_ns\": 12911351913,\n \"stddev_ns\": 749420,\n \"avg_ts\": 39.655026,\n \"stddev_ts\": 0.002275,\n \"samples_ns\": [ 12911632845, 12910511785, 12911911110 ],\n \"samples_ts\": [ 39.6542, 39.6576, 39.6533 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:02:01Z\",\n \"avg_ns\": 33736190869,\n \"stddev_ns\": 3681061,\n \"avg_ts\": 15.176580,\n \"stddev_ts\": 0.001654,\n \"samples_ns\": [ 33740124302, 33735607107, 33732841199 ],\n \"samples_ts\": [ 15.1748, 15.1768, 15.1781 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:01:10Z", + "avg_ns": 12911351913, + "stddev_ns": 749420, + "avg_ts": 39.655026, + "stddev_ts": 0.002275, + "samples_ns": [ + 12911632845, + 12910511785, + 12911911110 + ], + "samples_ts": [ + 39.6542, + 39.6576, + 39.6533 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:02:01Z", + "avg_ns": 33736190869, + "stddev_ns": 3681061, + "avg_ts": 15.17658, + "stddev_ts": 0.001654, + "samples_ns": [ + 33740124302, + 33735607107, + 33732841199 + ], + "samples_ts": [ + 15.1748, + 15.1768, + 15.1781 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 783 + }, + { + "timestamp_utc": "2025-12-09T14:04:22.069545+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:03:44Z\",\n \"avg_ns\": 3205073478,\n \"stddev_ns\": 267460,\n \"avg_ts\": 39.936682,\n \"stddev_ts\": 0.003257,\n \"samples_ns\": [ 3204873091, 3205369155, 3204978189 ],\n \"samples_ts\": [ 39.9392, 39.933, 39.9379 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:03:56Z\",\n \"avg_ns\": 8331283704,\n \"stddev_ns\": 5552249,\n \"avg_ts\": 15.363783,\n \"stddev_ts\": 0.010240,\n \"samples_ns\": [ 8336544787, 8325480115, 8331826210 ],\n \"samples_ts\": [ 15.3541, 15.3745, 15.3628 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:03:44Z", + "avg_ns": 3205073478, + "stddev_ns": 267460, + "avg_ts": 39.936682, + "stddev_ts": 0.003257, + "samples_ns": [ + 3204873091, + 3205369155, + 3204978189 + ], + "samples_ts": [ + 39.9392, + 39.933, + 39.9379 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:03:56Z", + "avg_ns": 8331283704, + "stddev_ns": 5552249, + "avg_ts": 15.363783, + "stddev_ts": 0.01024, + "samples_ns": [ + 8336544787, + 8325480115, + 8331826210 + ], + "samples_ts": [ + 15.3541, + 15.3745, + 15.3628 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 784 + }, + { + "timestamp_utc": "2025-12-09T14:06:16.876395+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:04:23Z\",\n \"avg_ns\": 3203324882,\n \"stddev_ns\": 187313,\n \"avg_ts\": 39.958482,\n \"stddev_ts\": 0.002337,\n \"samples_ns\": [ 3203338980, 3203504748, 3203130918 ],\n \"samples_ts\": [ 39.9583, 39.9562, 39.9609 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:04:35Z\",\n \"avg_ns\": 33614816694,\n \"stddev_ns\": 14559129,\n \"avg_ts\": 15.231381,\n \"stddev_ts\": 0.006598,\n \"samples_ns\": [ 33623973378, 33598029708, 33622446997 ],\n \"samples_ts\": [ 15.2272, 15.239, 15.2279 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:04:23Z", + "avg_ns": 3203324882, + "stddev_ns": 187313, + "avg_ts": 39.958482, + "stddev_ts": 0.002337, + "samples_ns": [ + 3203338980, + 3203504748, + 3203130918 + ], + "samples_ts": [ + 39.9583, + 39.9562, + 39.9609 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:04:35Z", + "avg_ns": 33614816694, + "stddev_ns": 14559129, + "avg_ts": 15.231381, + "stddev_ts": 0.006598, + "samples_ns": [ + 33623973378, + 33598029708, + 33622446997 + ], + "samples_ts": [ + 15.2272, + 15.239, + 15.2279 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 785 + }, + { + "timestamp_utc": "2025-12-09T14:07:34.777890+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:06:17Z\",\n \"avg_ns\": 12932797034,\n \"stddev_ns\": 1489188,\n \"avg_ts\": 39.589271,\n \"stddev_ts\": 0.004558,\n \"samples_ns\": [ 12932129039, 12934503263, 12931758800 ],\n \"samples_ts\": [ 39.5913, 39.584, 39.5924 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:07:09Z\",\n \"avg_ns\": 8340343850,\n \"stddev_ns\": 979488,\n \"avg_ts\": 15.347089,\n \"stddev_ts\": 0.001787,\n \"samples_ns\": [ 8340278639, 8341345749, 8339407164 ],\n \"samples_ts\": [ 15.3472, 15.3452, 15.3488 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:06:17Z", + "avg_ns": 12932797034, + "stddev_ns": 1489188, + "avg_ts": 39.589271, + "stddev_ts": 0.004558, + "samples_ns": [ + 12932129039, + 12934503263, + 12931758800 + ], + "samples_ts": [ + 39.5913, + 39.584, + 39.5924 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:07:09Z", + "avg_ns": 8340343850, + "stddev_ns": 979488, + "avg_ts": 15.347089, + "stddev_ts": 0.001787, + "samples_ns": [ + 8340278639, + 8341345749, + 8339407164 + ], + "samples_ts": [ + 15.3472, + 15.3452, + 15.3488 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 786 + }, + { + "timestamp_utc": "2025-12-09T14:10:08.736829+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:07:35Z\",\n \"avg_ns\": 12937456020,\n \"stddev_ns\": 3477362,\n \"avg_ts\": 39.575016,\n \"stddev_ts\": 0.010636,\n \"samples_ns\": [ 12941287978, 12934501233, 12936578849 ],\n \"samples_ts\": [ 39.5633, 39.5841, 39.5777 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:08:27Z\",\n \"avg_ns\": 33687651114,\n \"stddev_ns\": 7480618,\n \"avg_ts\": 15.198448,\n \"stddev_ts\": 0.003375,\n \"samples_ns\": [ 33693196724, 33690613665, 33679142953 ],\n \"samples_ts\": [ 15.1959, 15.1971, 15.2023 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:07:35Z", + "avg_ns": 12937456020, + "stddev_ns": 3477362, + "avg_ts": 39.575016, + "stddev_ts": 0.010636, + "samples_ns": [ + 12941287978, + 12934501233, + 12936578849 + ], + "samples_ts": [ + 39.5633, + 39.5841, + 39.5777 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:08:27Z", + "avg_ns": 33687651114, + "stddev_ns": 7480618, + "avg_ts": 15.198448, + "stddev_ts": 0.003375, + "samples_ns": [ + 33693196724, + 33690613665, + 33679142953 + ], + "samples_ts": [ + 15.1959, + 15.1971, + 15.2023 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 787 + }, + { + "timestamp_utc": "2025-12-09T14:10:47.773907+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:10:09Z\",\n \"avg_ns\": 3203467953,\n \"stddev_ns\": 224377,\n \"avg_ts\": 39.956698,\n \"stddev_ts\": 0.002614,\n \"samples_ns\": [ 3203689287, 3203272451, 3203442123 ],\n \"samples_ts\": [ 39.9539, 39.9591, 39.957 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:10:22Z\",\n \"avg_ns\": 8344326226,\n \"stddev_ns\": 6851209,\n \"avg_ts\": 15.339771,\n \"stddev_ts\": 0.012589,\n \"samples_ns\": [ 8340316875, 8352237076, 8340424727 ],\n \"samples_ts\": [ 15.3471, 15.3252, 15.3469 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:10:09Z", + "avg_ns": 3203467953, + "stddev_ns": 224377, + "avg_ts": 39.956698, + "stddev_ts": 0.002614, + "samples_ns": [ + 3203689287, + 3203272451, + 3203442123 + ], + "samples_ts": [ + 39.9539, + 39.9591, + 39.957 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:10:22Z", + "avg_ns": 8344326226, + "stddev_ns": 6851209, + "avg_ts": 15.339771, + "stddev_ts": 0.012589, + "samples_ns": [ + 8340316875, + 8352237076, + 8340424727 + ], + "samples_ts": [ + 15.3471, + 15.3252, + 15.3469 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 788 + }, + { + "timestamp_utc": "2025-12-09T14:12:43.307732+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:10:48Z\",\n \"avg_ns\": 3201567158,\n \"stddev_ns\": 164724,\n \"avg_ts\": 39.980420,\n \"stddev_ts\": 0.001932,\n \"samples_ns\": [ 3201665438, 3201647202, 3201388835 ],\n \"samples_ts\": [ 39.9792, 39.9794, 39.9826 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:11:01Z\",\n \"avg_ns\": 33859155449,\n \"stddev_ns\": 16761405,\n \"avg_ts\": 15.121466,\n \"stddev_ts\": 0.007484,\n \"samples_ns\": [ 33878417667, 33851158336, 33847890344 ],\n \"samples_ts\": [ 15.1129, 15.125, 15.1265 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:10:48Z", + "avg_ns": 3201567158, + "stddev_ns": 164724, + "avg_ts": 39.98042, + "stddev_ts": 0.001932, + "samples_ns": [ + 3201665438, + 3201647202, + 3201388835 + ], + "samples_ts": [ + 39.9792, + 39.9794, + 39.9826 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:11:01Z", + "avg_ns": 33859155449, + "stddev_ns": 16761405, + "avg_ts": 15.121466, + "stddev_ts": 0.007484, + "samples_ns": [ + 33878417667, + 33851158336, + 33847890344 + ], + "samples_ts": [ + 15.1129, + 15.125, + 15.1265 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 789 + }, + { + "timestamp_utc": "2025-12-09T14:14:01.988995+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:12:44Z\",\n \"avg_ns\": 13141329446,\n \"stddev_ns\": 650987,\n \"avg_ts\": 38.961051,\n \"stddev_ts\": 0.001869,\n \"samples_ns\": [ 13140634930, 13141487661, 13141865749 ],\n \"samples_ts\": [ 38.9631, 38.9606, 38.9595 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:13:36Z\",\n \"avg_ns\": 8317873143,\n \"stddev_ns\": 2102155,\n \"avg_ts\": 15.388550,\n \"stddev_ts\": 0.003881,\n \"samples_ns\": [ 8317354753, 8316082735, 8320181943 ],\n \"samples_ts\": [ 15.3895, 15.3919, 15.3843 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:12:44Z", + "avg_ns": 13141329446, + "stddev_ns": 650987, + "avg_ts": 38.961051, + "stddev_ts": 0.001869, + "samples_ns": [ + 13140634930, + 13141487661, + 13141865749 + ], + "samples_ts": [ + 38.9631, + 38.9606, + 38.9595 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:13:36Z", + "avg_ns": 8317873143, + "stddev_ns": 2102155, + "avg_ts": 15.38855, + "stddev_ts": 0.003881, + "samples_ns": [ + 8317354753, + 8316082735, + 8320181943 + ], + "samples_ts": [ + 15.3895, + 15.3919, + 15.3843 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 790 + }, + { + "timestamp_utc": "2025-12-09T14:16:37.506704+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:14:02Z\",\n \"avg_ns\": 13145794172,\n \"stddev_ns\": 2877298,\n \"avg_ts\": 38.947819,\n \"stddev_ts\": 0.008510,\n \"samples_ns\": [ 13149033096, 13143554560, 13144794862 ],\n \"samples_ts\": [ 38.9382, 38.9545, 38.9508 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:14:55Z\",\n \"avg_ns\": 33927502723,\n \"stddev_ns\": 5146920,\n \"avg_ts\": 15.091002,\n \"stddev_ts\": 0.002286,\n \"samples_ns\": [ 33925405637, 33933360072, 33923742462 ],\n \"samples_ts\": [ 15.0919, 15.0884, 15.0927 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:14:02Z", + "avg_ns": 13145794172, + "stddev_ns": 2877298, + "avg_ts": 38.947819, + "stddev_ts": 0.00851, + "samples_ns": [ + 13149033096, + 13143554560, + 13144794862 + ], + "samples_ts": [ + 38.9382, + 38.9545, + 38.9508 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:14:55Z", + "avg_ns": 33927502723, + "stddev_ns": 5146920, + "avg_ts": 15.091002, + "stddev_ts": 0.002286, + "samples_ns": [ + 33925405637, + 33933360072, + 33923742462 + ], + "samples_ts": [ + 15.0919, + 15.0884, + 15.0927 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 791 + }, + { + "timestamp_utc": "2025-12-09T14:17:11.289159+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:16:38Z\",\n \"avg_ns\": 2207512967,\n \"stddev_ns\": 1673964,\n \"avg_ts\": 57.983827,\n \"stddev_ts\": 0.043917,\n \"samples_ns\": [ 2209421655, 2206302838, 2206814410 ],\n \"samples_ts\": [ 57.9337, 58.0156, 58.0022 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:16:47Z\",\n \"avg_ns\": 7929725840,\n \"stddev_ns\": 7158562,\n \"avg_ts\": 16.141803,\n \"stddev_ts\": 0.014565,\n \"samples_ns\": [ 7926122418, 7937970107, 7925084995 ],\n \"samples_ts\": [ 16.1491, 16.125, 16.1512 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:16:38Z", + "avg_ns": 2207512967, + "stddev_ns": 1673964, + "avg_ts": 57.983827, + "stddev_ts": 0.043917, + "samples_ns": [ + 2209421655, + 2206302838, + 2206814410 + ], + "samples_ts": [ + 57.9337, + 58.0156, + 58.0022 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:16:47Z", + "avg_ns": 7929725840, + "stddev_ns": 7158562, + "avg_ts": 16.141803, + "stddev_ts": 0.014565, + "samples_ns": [ + 7926122418, + 7937970107, + 7925084995 + ], + "samples_ts": [ + 16.1491, + 16.125, + 16.1512 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 792 + }, + { + "timestamp_utc": "2025-12-09T14:18:57.539277+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:17:12Z\",\n \"avg_ns\": 2204686031,\n \"stddev_ns\": 379107,\n \"avg_ts\": 58.058155,\n \"stddev_ts\": 0.009984,\n \"samples_ns\": [ 2204728218, 2204287595, 2205042280 ],\n \"samples_ts\": [ 58.057, 58.0686, 58.0488 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:17:21Z\",\n \"avg_ns\": 32079330823,\n \"stddev_ns\": 52446493,\n \"avg_ts\": 15.960461,\n \"stddev_ts\": 0.026111,\n \"samples_ns\": [ 32122617180, 32094366701, 32021008588 ],\n \"samples_ts\": [ 15.9389, 15.953, 15.9895 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:17:12Z", + "avg_ns": 2204686031, + "stddev_ns": 379107, + "avg_ts": 58.058155, + "stddev_ts": 0.009984, + "samples_ns": [ + 2204728218, + 2204287595, + 2205042280 + ], + "samples_ts": [ + 58.057, + 58.0686, + 58.0488 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:17:21Z", + "avg_ns": 32079330823, + "stddev_ns": 52446493, + "avg_ts": 15.960461, + "stddev_ts": 0.026111, + "samples_ns": [ + 32122617180, + 32094366701, + 32021008588 + ], + "samples_ts": [ + 15.9389, + 15.953, + 15.9895 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 793 + }, + { + "timestamp_utc": "2025-12-09T14:19:58.040780+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:18:58Z\",\n \"avg_ns\": 8872068291,\n \"stddev_ns\": 5543724,\n \"avg_ts\": 57.709219,\n \"stddev_ts\": 0.036039,\n \"samples_ns\": [ 8867696530, 8878301507, 8870206838 ],\n \"samples_ts\": [ 57.7377, 57.6687, 57.7213 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:19:34Z\",\n \"avg_ns\": 7959716308,\n \"stddev_ns\": 530619,\n \"avg_ts\": 16.080975,\n \"stddev_ts\": 0.001041,\n \"samples_ns\": [ 7959641852, 7959242186, 7960264888 ],\n \"samples_ts\": [ 16.0811, 16.0819, 16.0799 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:18:58Z", + "avg_ns": 8872068291, + "stddev_ns": 5543724, + "avg_ts": 57.709219, + "stddev_ts": 0.036039, + "samples_ns": [ + 8867696530, + 8878301507, + 8870206838 + ], + "samples_ts": [ + 57.7377, + 57.6687, + 57.7213 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:19:34Z", + "avg_ns": 7959716308, + "stddev_ns": 530619, + "avg_ts": 16.080975, + "stddev_ts": 0.001041, + "samples_ns": [ + 7959641852, + 7959242186, + 7960264888 + ], + "samples_ts": [ + 16.0811, + 16.0819, + 16.0799 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 794 + }, + { + "timestamp_utc": "2025-12-09T14:22:11.030102+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:19:59Z\",\n \"avg_ns\": 8876333939,\n \"stddev_ns\": 18136911,\n \"avg_ts\": 57.681632,\n \"stddev_ts\": 0.117722,\n \"samples_ns\": [ 8864870191, 8866887686, 8897243941 ],\n \"samples_ts\": [ 57.7561, 57.7429, 57.5459 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:20:34Z\",\n \"avg_ns\": 32109879031,\n \"stddev_ns\": 24764893,\n \"avg_ts\": 15.945255,\n \"stddev_ts\": 0.012292,\n \"samples_ns\": [ 32138053506, 32100020860, 32091562729 ],\n \"samples_ts\": [ 15.9313, 15.9501, 15.9543 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:19:59Z", + "avg_ns": 8876333939, + "stddev_ns": 18136911, + "avg_ts": 57.681632, + "stddev_ts": 0.117722, + "samples_ns": [ + 8864870191, + 8866887686, + 8897243941 + ], + "samples_ts": [ + 57.7561, + 57.7429, + 57.5459 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:20:34Z", + "avg_ns": 32109879031, + "stddev_ns": 24764893, + "avg_ts": 15.945255, + "stddev_ts": 0.012292, + "samples_ns": [ + 32138053506, + 32100020860, + 32091562729 + ], + "samples_ts": [ + 15.9313, + 15.9501, + 15.9543 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 795 + }, + { + "timestamp_utc": "2025-12-09T14:22:44.861896+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:22:12Z\",\n \"avg_ns\": 2206999251,\n \"stddev_ns\": 239406,\n \"avg_ts\": 57.997301,\n \"stddev_ts\": 0.006169,\n \"samples_ns\": [ 2207036287, 2207213284, 2206748183 ],\n \"samples_ts\": [ 57.9963, 57.9917, 58.0039 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:22:20Z\",\n \"avg_ns\": 7955939778,\n \"stddev_ns\": 7821196,\n \"avg_ts\": 16.088619,\n \"stddev_ts\": 0.015817,\n \"samples_ns\": [ 7956091194, 7948043973, 7963684167 ],\n \"samples_ts\": [ 16.0883, 16.1046, 16.073 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:22:12Z", + "avg_ns": 2206999251, + "stddev_ns": 239406, + "avg_ts": 57.997301, + "stddev_ts": 0.006169, + "samples_ns": [ + 2207036287, + 2207213284, + 2206748183 + ], + "samples_ts": [ + 57.9963, + 57.9917, + 58.0039 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:22:20Z", + "avg_ns": 7955939778, + "stddev_ns": 7821196, + "avg_ts": 16.088619, + "stddev_ts": 0.015817, + "samples_ns": [ + 7956091194, + 7948043973, + 7963684167 + ], + "samples_ts": [ + 16.0883, + 16.1046, + 16.073 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 796 + }, + { + "timestamp_utc": "2025-12-09T14:24:31.097235+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:22:45Z\",\n \"avg_ns\": 2206383971,\n \"stddev_ns\": 576728,\n \"avg_ts\": 58.013477,\n \"stddev_ts\": 0.015164,\n \"samples_ns\": [ 2206980005, 2205828713, 2206343195 ],\n \"samples_ts\": [ 57.9978, 58.0281, 58.0145 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:22:54Z\",\n \"avg_ns\": 32088661910,\n \"stddev_ns\": 14897546,\n \"avg_ts\": 15.955794,\n \"stddev_ts\": 0.007406,\n \"samples_ns\": [ 32105805783, 32081315781, 32078864166 ],\n \"samples_ts\": [ 15.9473, 15.9594, 15.9607 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:22:45Z", + "avg_ns": 2206383971, + "stddev_ns": 576728, + "avg_ts": 58.013477, + "stddev_ts": 0.015164, + "samples_ns": [ + 2206980005, + 2205828713, + 2206343195 + ], + "samples_ts": [ + 57.9978, + 58.0281, + 58.0145 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:22:54Z", + "avg_ns": 32088661910, + "stddev_ns": 14897546, + "avg_ts": 15.955794, + "stddev_ts": 0.007406, + "samples_ns": [ + 32105805783, + 32081315781, + 32078864166 + ], + "samples_ts": [ + 15.9473, + 15.9594, + 15.9607 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 797 + }, + { + "timestamp_utc": "2025-12-09T14:25:31.680758+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:24:32Z\",\n \"avg_ns\": 8878095563,\n \"stddev_ns\": 3910458,\n \"avg_ts\": 57.670033,\n \"stddev_ts\": 0.025389,\n \"samples_ns\": [ 8878736123, 8881643900, 8873906668 ],\n \"samples_ts\": [ 57.6659, 57.647, 57.6972 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:25:07Z\",\n \"avg_ns\": 7960182539,\n \"stddev_ns\": 4143793,\n \"avg_ts\": 16.080036,\n \"stddev_ts\": 0.008369,\n \"samples_ns\": [ 7964246071, 7960336831, 7955964716 ],\n \"samples_ts\": [ 16.0718, 16.0797, 16.0886 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:24:32Z", + "avg_ns": 8878095563, + "stddev_ns": 3910458, + "avg_ts": 57.670033, + "stddev_ts": 0.025389, + "samples_ns": [ + 8878736123, + 8881643900, + 8873906668 + ], + "samples_ts": [ + 57.6659, + 57.647, + 57.6972 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:25:07Z", + "avg_ns": 7960182539, + "stddev_ns": 4143793, + "avg_ts": 16.080036, + "stddev_ts": 0.008369, + "samples_ns": [ + 7964246071, + 7960336831, + 7955964716 + ], + "samples_ts": [ + 16.0718, + 16.0797, + 16.0886 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 798 + }, + { + "timestamp_utc": "2025-12-09T14:27:45.127231+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:25:32Z\",\n \"avg_ns\": 8875567966,\n \"stddev_ns\": 2896547,\n \"avg_ts\": 57.686453,\n \"stddev_ts\": 0.018823,\n \"samples_ns\": [ 8874565628, 8878832552, 8873305718 ],\n \"samples_ts\": [ 57.693, 57.6652, 57.7012 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:26:08Z\",\n \"avg_ns\": 32264726894,\n \"stddev_ns\": 5837779,\n \"avg_ts\": 15.868723,\n \"stddev_ts\": 0.002870,\n \"samples_ns\": [ 32266434886, 32269517319, 32258228478 ],\n \"samples_ts\": [ 15.8679, 15.8664, 15.8719 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:25:32Z", + "avg_ns": 8875567966, + "stddev_ns": 2896547, + "avg_ts": 57.686453, + "stddev_ts": 0.018823, + "samples_ns": [ + 8874565628, + 8878832552, + 8873305718 + ], + "samples_ts": [ + 57.693, + 57.6652, + 57.7012 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:26:08Z", + "avg_ns": 32264726894, + "stddev_ns": 5837779, + "avg_ts": 15.868723, + "stddev_ts": 0.00287, + "samples_ns": [ + 32266434886, + 32269517319, + 32258228478 + ], + "samples_ts": [ + 15.8679, + 15.8664, + 15.8719 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 799 + }, + { + "timestamp_utc": "2025-12-09T14:28:18.986803+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:27:46Z\",\n \"avg_ns\": 2204303485,\n \"stddev_ns\": 1161952,\n \"avg_ts\": 58.068240,\n \"stddev_ts\": 0.030607,\n \"samples_ns\": [ 2204176893, 2203210012, 2205523550 ],\n \"samples_ts\": [ 58.0716, 58.097, 58.0361 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:27:54Z\",\n \"avg_ns\": 7961093977,\n \"stddev_ns\": 7289634,\n \"avg_ts\": 16.078201,\n \"stddev_ts\": 0.014714,\n \"samples_ns\": [ 7958695808, 7969279438, 7955306687 ],\n \"samples_ts\": [ 16.083, 16.0617, 16.0899 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:27:46Z", + "avg_ns": 2204303485, + "stddev_ns": 1161952, + "avg_ts": 58.06824, + "stddev_ts": 0.030607, + "samples_ns": [ + 2204176893, + 2203210012, + 2205523550 + ], + "samples_ts": [ + 58.0716, + 58.097, + 58.0361 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:27:54Z", + "avg_ns": 7961093977, + "stddev_ns": 7289634, + "avg_ts": 16.078201, + "stddev_ts": 0.014714, + "samples_ns": [ + 7958695808, + 7969279438, + 7955306687 + ], + "samples_ts": [ + 16.083, + 16.0617, + 16.0899 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 800 + }, + { + "timestamp_utc": "2025-12-09T14:30:04.983636+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:28:19Z\",\n \"avg_ns\": 2204795804,\n \"stddev_ns\": 343694,\n \"avg_ts\": 58.055264,\n \"stddev_ts\": 0.008879,\n \"samples_ns\": [ 2205146705, 2204766533, 2204474176 ],\n \"samples_ts\": [ 58.046, 58.056, 58.0637 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:28:28Z\",\n \"avg_ns\": 32003679016,\n \"stddev_ns\": 21989078,\n \"avg_ts\": 15.998166,\n \"stddev_ts\": 0.010987,\n \"samples_ns\": [ 32028944119, 31993224422, 31988868508 ],\n \"samples_ts\": [ 15.9855, 16.0034, 16.0056 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:28:19Z", + "avg_ns": 2204795804, + "stddev_ns": 343694, + "avg_ts": 58.055264, + "stddev_ts": 0.008879, + "samples_ns": [ + 2205146705, + 2204766533, + 2204474176 + ], + "samples_ts": [ + 58.046, + 58.056, + 58.0637 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:28:28Z", + "avg_ns": 32003679016, + "stddev_ns": 21989078, + "avg_ts": 15.998166, + "stddev_ts": 0.010987, + "samples_ns": [ + 32028944119, + 31993224422, + 31988868508 + ], + "samples_ts": [ + 15.9855, + 16.0034, + 16.0056 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 801 + }, + { + "timestamp_utc": "2025-12-09T14:31:06.321042+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:30:05Z\",\n \"avg_ns\": 9069210350,\n \"stddev_ns\": 5316900,\n \"avg_ts\": 56.454763,\n \"stddev_ts\": 0.033104,\n \"samples_ns\": [ 9073750715, 9070519029, 9063361306 ],\n \"samples_ts\": [ 56.4265, 56.4466, 56.4912 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:30:42Z\",\n \"avg_ns\": 7964512742,\n \"stddev_ns\": 1151952,\n \"avg_ts\": 16.071291,\n \"stddev_ts\": 0.002311,\n \"samples_ns\": [ 7964572114, 7963339194, 7965626920 ],\n \"samples_ts\": [ 16.0712, 16.0737, 16.069 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:30:05Z", + "avg_ns": 9069210350, + "stddev_ns": 5316900, + "avg_ts": 56.454763, + "stddev_ts": 0.033104, + "samples_ns": [ + 9073750715, + 9070519029, + 9063361306 + ], + "samples_ts": [ + 56.4265, + 56.4466, + 56.4912 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:30:42Z", + "avg_ns": 7964512742, + "stddev_ns": 1151952, + "avg_ts": 16.071291, + "stddev_ts": 0.002311, + "samples_ns": [ + 7964572114, + 7963339194, + 7965626920 + ], + "samples_ts": [ + 16.0712, + 16.0737, + 16.069 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 802 + }, + { + "timestamp_utc": "2025-12-09T14:33:20.465884+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:31:07Z\",\n \"avg_ns\": 9064604509,\n \"stddev_ns\": 654874,\n \"avg_ts\": 56.483435,\n \"stddev_ts\": 0.004081,\n \"samples_ns\": [ 9063995662, 9064520551, 9065297314 ],\n \"samples_ts\": [ 56.4872, 56.484, 56.4791 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:31:43Z\",\n \"avg_ns\": 32245000768,\n \"stddev_ns\": 6853404,\n \"avg_ts\": 15.878431,\n \"stddev_ts\": 0.003375,\n \"samples_ns\": [ 32242944400, 32252646933, 32239410971 ],\n \"samples_ts\": [ 15.8794, 15.8747, 15.8812 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:31:07Z", + "avg_ns": 9064604509, + "stddev_ns": 654874, + "avg_ts": 56.483435, + "stddev_ts": 0.004081, + "samples_ns": [ + 9063995662, + 9064520551, + 9065297314 + ], + "samples_ts": [ + 56.4872, + 56.484, + 56.4791 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:31:43Z", + "avg_ns": 32245000768, + "stddev_ns": 6853404, + "avg_ts": 15.878431, + "stddev_ts": 0.003375, + "samples_ns": [ + 32242944400, + 32252646933, + 32239410971 + ], + "samples_ts": [ + 15.8794, + 15.8747, + 15.8812 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 803 + }, + { + "timestamp_utc": "2025-12-09T14:33:54.439879+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:33:21Z\",\n \"avg_ns\": 2203004015,\n \"stddev_ns\": 748764,\n \"avg_ts\": 58.102486,\n \"stddev_ts\": 0.019674,\n \"samples_ns\": [ 2202153126, 2203544490, 2203314431 ],\n \"samples_ts\": [ 58.1249, 58.0882, 58.0943 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:33:30Z\",\n \"avg_ns\": 7990634749,\n \"stddev_ns\": 9044458,\n \"avg_ts\": 16.018766,\n \"stddev_ts\": 0.018119,\n \"samples_ns\": [ 8000990910, 7984295332, 7986618007 ],\n \"samples_ts\": [ 15.998, 16.0315, 16.0268 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:33:21Z", + "avg_ns": 2203004015, + "stddev_ns": 748764, + "avg_ts": 58.102486, + "stddev_ts": 0.019674, + "samples_ns": [ + 2202153126, + 2203544490, + 2203314431 + ], + "samples_ts": [ + 58.1249, + 58.0882, + 58.0943 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:33:30Z", + "avg_ns": 7990634749, + "stddev_ns": 9044458, + "avg_ts": 16.018766, + "stddev_ts": 0.018119, + "samples_ns": [ + 8000990910, + 7984295332, + 7986618007 + ], + "samples_ts": [ + 15.998, + 16.0315, + 16.0268 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 804 + }, + { + "timestamp_utc": "2025-12-09T14:35:40.644629+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:33:55Z\",\n \"avg_ns\": 2202466618,\n \"stddev_ns\": 5520640,\n \"avg_ts\": 58.116902,\n \"stddev_ts\": 0.145566,\n \"samples_ns\": [ 2197610845, 2201317694, 2208471315 ],\n \"samples_ts\": [ 58.2451, 58.147, 57.9586 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:34:04Z\",\n \"avg_ns\": 32070962558,\n \"stddev_ns\": 72202496,\n \"avg_ts\": 15.964651,\n \"stddev_ts\": 0.035988,\n \"samples_ns\": [ 31987676423, 32115886669, 32109324582 ],\n \"samples_ts\": [ 16.0062, 15.9423, 15.9455 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:33:55Z", + "avg_ns": 2202466618, + "stddev_ns": 5520640, + "avg_ts": 58.116902, + "stddev_ts": 0.145566, + "samples_ns": [ + 2197610845, + 2201317694, + 2208471315 + ], + "samples_ts": [ + 58.2451, + 58.147, + 57.9586 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:34:04Z", + "avg_ns": 32070962558, + "stddev_ns": 72202496, + "avg_ts": 15.964651, + "stddev_ts": 0.035988, + "samples_ns": [ + 31987676423, + 32115886669, + 32109324582 + ], + "samples_ts": [ + 16.0062, + 15.9423, + 15.9455 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 805 + }, + { + "timestamp_utc": "2025-12-09T14:36:41.147172+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:35:41Z\",\n \"avg_ns\": 8870536730,\n \"stddev_ns\": 2031337,\n \"avg_ts\": 57.719170,\n \"stddev_ts\": 0.013204,\n \"samples_ns\": [ 8868310480, 8872282564, 8871017147 ],\n \"samples_ts\": [ 57.7337, 57.7078, 57.716 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:36:17Z\",\n \"avg_ns\": 7954137726,\n \"stddev_ns\": 901686,\n \"avg_ts\": 16.092254,\n \"stddev_ts\": 0.001815,\n \"samples_ns\": [ 7954892929, 7953145848, 7954374402 ],\n \"samples_ts\": [ 16.0907, 16.0943, 16.0918 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:35:41Z", + "avg_ns": 8870536730, + "stddev_ns": 2031337, + "avg_ts": 57.71917, + "stddev_ts": 0.013204, + "samples_ns": [ + 8868310480, + 8872282564, + 8871017147 + ], + "samples_ts": [ + 57.7337, + 57.7078, + 57.716 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:36:17Z", + "avg_ns": 7954137726, + "stddev_ns": 901686, + "avg_ts": 16.092254, + "stddev_ts": 0.001815, + "samples_ns": [ + 7954892929, + 7953145848, + 7954374402 + ], + "samples_ts": [ + 16.0907, + 16.0943, + 16.0918 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 806 + }, + { + "timestamp_utc": "2025-12-09T14:38:54.234469+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:36:42Z\",\n \"avg_ns\": 8873897681,\n \"stddev_ns\": 3670695,\n \"avg_ts\": 57.697314,\n \"stddev_ts\": 0.023861,\n \"samples_ns\": [ 8878130398, 8871588755, 8871973890 ],\n \"samples_ts\": [ 57.6698, 57.7123, 57.7098 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:37:17Z\",\n \"avg_ns\": 32149260048,\n \"stddev_ns\": 15984354,\n \"avg_ts\": 15.925719,\n \"stddev_ts\": 0.007917,\n \"samples_ns\": [ 32134576770, 32166285099, 32146918276 ],\n \"samples_ts\": [ 15.933, 15.9173, 15.9269 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:36:42Z", + "avg_ns": 8873897681, + "stddev_ns": 3670695, + "avg_ts": 57.697314, + "stddev_ts": 0.023861, + "samples_ns": [ + 8878130398, + 8871588755, + 8871973890 + ], + "samples_ts": [ + 57.6698, + 57.7123, + 57.7098 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:37:17Z", + "avg_ns": 32149260048, + "stddev_ns": 15984354, + "avg_ts": 15.925719, + "stddev_ts": 0.007917, + "samples_ns": [ + 32134576770, + 32166285099, + 32146918276 + ], + "samples_ts": [ + 15.933, + 15.9173, + 15.9269 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 807 + }, + { + "timestamp_utc": "2025-12-09T14:39:28.000849+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:38:55Z\",\n \"avg_ns\": 2206319327,\n \"stddev_ns\": 3948561,\n \"avg_ts\": 58.015298,\n \"stddev_ts\": 0.103709,\n \"samples_ns\": [ 2203739608, 2204354113, 2210864262 ],\n \"samples_ts\": [ 58.0831, 58.0669, 57.8959 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:39:04Z\",\n \"avg_ns\": 7933045660,\n \"stddev_ns\": 8979992,\n \"avg_ts\": 16.135053,\n \"stddev_ts\": 0.018276,\n \"samples_ns\": [ 7937825754, 7938624508, 7922686718 ],\n \"samples_ts\": [ 16.1253, 16.1237, 16.1561 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:38:55Z", + "avg_ns": 2206319327, + "stddev_ns": 3948561, + "avg_ts": 58.015298, + "stddev_ts": 0.103709, + "samples_ns": [ + 2203739608, + 2204354113, + 2210864262 + ], + "samples_ts": [ + 58.0831, + 58.0669, + 57.8959 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:39:04Z", + "avg_ns": 7933045660, + "stddev_ns": 8979992, + "avg_ts": 16.135053, + "stddev_ts": 0.018276, + "samples_ns": [ + 7937825754, + 7938624508, + 7922686718 + ], + "samples_ts": [ + 16.1253, + 16.1237, + 16.1561 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 808 + }, + { + "timestamp_utc": "2025-12-09T14:41:14.089380+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:39:29Z\",\n \"avg_ns\": 2200124377,\n \"stddev_ns\": 1000409,\n \"avg_ts\": 58.178537,\n \"stddev_ts\": 0.026461,\n \"samples_ns\": [ 2200815830, 2198977252, 2200580049 ],\n \"samples_ts\": [ 58.1603, 58.2089, 58.1665 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:39:37Z\",\n \"avg_ns\": 32039154046,\n \"stddev_ns\": 39638332,\n \"avg_ts\": 15.980463,\n \"stddev_ts\": 0.019777,\n \"samples_ns\": [ 31996439286, 32046273440, 32074749414 ],\n \"samples_ts\": [ 16.0018, 15.9769, 15.9627 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:39:29Z", + "avg_ns": 2200124377, + "stddev_ns": 1000409, + "avg_ts": 58.178537, + "stddev_ts": 0.026461, + "samples_ns": [ + 2200815830, + 2198977252, + 2200580049 + ], + "samples_ts": [ + 58.1603, + 58.2089, + 58.1665 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:39:37Z", + "avg_ns": 32039154046, + "stddev_ns": 39638332, + "avg_ts": 15.980463, + "stddev_ts": 0.019777, + "samples_ns": [ + 31996439286, + 32046273440, + 32074749414 + ], + "samples_ts": [ + 16.0018, + 15.9769, + 15.9627 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 809 + }, + { + "timestamp_utc": "2025-12-09T14:42:14.525285+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:41:15Z\",\n \"avg_ns\": 8887140288,\n \"stddev_ns\": 4070362,\n \"avg_ts\": 57.611341,\n \"stddev_ts\": 0.026379,\n \"samples_ns\": [ 8887049399, 8883117224, 8891254242 ],\n \"samples_ts\": [ 57.6119, 57.6374, 57.5847 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:41:50Z\",\n \"avg_ns\": 7917685818,\n \"stddev_ns\": 4601880,\n \"avg_ts\": 16.166344,\n \"stddev_ts\": 0.009394,\n \"samples_ns\": [ 7918666870, 7921716313, 7912674273 ],\n \"samples_ts\": [ 16.1643, 16.1581, 16.1766 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:41:15Z", + "avg_ns": 8887140288, + "stddev_ns": 4070362, + "avg_ts": 57.611341, + "stddev_ts": 0.026379, + "samples_ns": [ + 8887049399, + 8883117224, + 8891254242 + ], + "samples_ts": [ + 57.6119, + 57.6374, + 57.5847 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:41:50Z", + "avg_ns": 7917685818, + "stddev_ns": 4601880, + "avg_ts": 16.166344, + "stddev_ts": 0.009394, + "samples_ns": [ + 7918666870, + 7921716313, + 7912674273 + ], + "samples_ts": [ + 16.1643, + 16.1581, + 16.1766 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 810 + }, + { + "timestamp_utc": "2025-12-09T14:44:27.647927+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:42:15Z\",\n \"avg_ns\": 8874619479,\n \"stddev_ns\": 1191039,\n \"avg_ts\": 57.692615,\n \"stddev_ts\": 0.007718,\n \"samples_ns\": [ 8875990388, 8873921427, 8873946623 ],\n \"samples_ts\": [ 57.6837, 57.6972, 57.697 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:42:51Z\",\n \"avg_ns\": 32152490010,\n \"stddev_ns\": 8851922,\n \"avg_ts\": 15.924117,\n \"stddev_ts\": 0.004383,\n \"samples_ns\": [ 32162582147, 32146041048, 32148846835 ],\n \"samples_ts\": [ 15.9191, 15.9273, 15.9259 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:42:15Z", + "avg_ns": 8874619479, + "stddev_ns": 1191039, + "avg_ts": 57.692615, + "stddev_ts": 0.007718, + "samples_ns": [ + 8875990388, + 8873921427, + 8873946623 + ], + "samples_ts": [ + 57.6837, + 57.6972, + 57.697 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:42:51Z", + "avg_ns": 32152490010, + "stddev_ns": 8851922, + "avg_ts": 15.924117, + "stddev_ts": 0.004383, + "samples_ns": [ + 32162582147, + 32146041048, + 32148846835 + ], + "samples_ts": [ + 15.9191, + 15.9273, + 15.9259 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 811 + }, + { + "timestamp_utc": "2025-12-09T14:45:01.534008+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:44:28Z\",\n \"avg_ns\": 2192091840,\n \"stddev_ns\": 1099606,\n \"avg_ts\": 58.391724,\n \"stddev_ts\": 0.029265,\n \"samples_ns\": [ 2193166986, 2192137353, 2190971182 ],\n \"samples_ts\": [ 58.3631, 58.3905, 58.4216 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:44:37Z\",\n \"avg_ns\": 7980476060,\n \"stddev_ns\": 2413209,\n \"avg_ts\": 16.039144,\n \"stddev_ts\": 0.004848,\n \"samples_ns\": [ 7977692214, 7981810975, 7981924992 ],\n \"samples_ts\": [ 16.0447, 16.0365, 16.0362 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:44:28Z", + "avg_ns": 2192091840, + "stddev_ns": 1099606, + "avg_ts": 58.391724, + "stddev_ts": 0.029265, + "samples_ns": [ + 2193166986, + 2192137353, + 2190971182 + ], + "samples_ts": [ + 58.3631, + 58.3905, + 58.4216 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:44:37Z", + "avg_ns": 7980476060, + "stddev_ns": 2413209, + "avg_ts": 16.039144, + "stddev_ts": 0.004848, + "samples_ns": [ + 7977692214, + 7981810975, + 7981924992 + ], + "samples_ts": [ + 16.0447, + 16.0365, + 16.0362 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 812 + }, + { + "timestamp_utc": "2025-12-09T14:46:48.209819+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:45:02Z\",\n \"avg_ns\": 2206223132,\n \"stddev_ns\": 842289,\n \"avg_ts\": 58.017709,\n \"stddev_ts\": 0.022076,\n \"samples_ns\": [ 2205713075, 2207192248, 2205764075 ],\n \"samples_ts\": [ 58.0311, 57.9922, 58.0298 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:45:11Z\",\n \"avg_ns\": 32224410116,\n \"stddev_ns\": 3915271,\n \"avg_ts\": 15.888576,\n \"stddev_ts\": 0.001928,\n \"samples_ns\": [ 32220811485, 32228572590, 32223846274 ],\n \"samples_ts\": [ 15.8904, 15.8865, 15.8889 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:45:02Z", + "avg_ns": 2206223132, + "stddev_ns": 842289, + "avg_ts": 58.017709, + "stddev_ts": 0.022076, + "samples_ns": [ + 2205713075, + 2207192248, + 2205764075 + ], + "samples_ts": [ + 58.0311, + 57.9922, + 58.0298 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:45:11Z", + "avg_ns": 32224410116, + "stddev_ns": 3915271, + "avg_ts": 15.888576, + "stddev_ts": 0.001928, + "samples_ns": [ + 32220811485, + 32228572590, + 32223846274 + ], + "samples_ts": [ + 15.8904, + 15.8865, + 15.8889 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 813 + }, + { + "timestamp_utc": "2025-12-09T14:47:49.525782+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:46:49Z\",\n \"avg_ns\": 9056554823,\n \"stddev_ns\": 3525718,\n \"avg_ts\": 56.533645,\n \"stddev_ts\": 0.022005,\n \"samples_ns\": [ 9058562253, 9058616937, 9052485280 ],\n \"samples_ts\": [ 56.5211, 56.5208, 56.5591 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:47:25Z\",\n \"avg_ns\": 7968254449,\n \"stddev_ns\": 922573,\n \"avg_ts\": 16.063744,\n \"stddev_ts\": 0.001860,\n \"samples_ns\": [ 7969040874, 7967238907, 7968483566 ],\n \"samples_ts\": [ 16.0622, 16.0658, 16.0633 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:46:49Z", + "avg_ns": 9056554823, + "stddev_ns": 3525718, + "avg_ts": 56.533645, + "stddev_ts": 0.022005, + "samples_ns": [ + 9058562253, + 9058616937, + 9052485280 + ], + "samples_ts": [ + 56.5211, + 56.5208, + 56.5591 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:47:25Z", + "avg_ns": 7968254449, + "stddev_ns": 922573, + "avg_ts": 16.063744, + "stddev_ts": 0.00186, + "samples_ns": [ + 7969040874, + 7967238907, + 7968483566 + ], + "samples_ts": [ + 16.0622, + 16.0658, + 16.0633 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 814 + }, + { + "timestamp_utc": "2025-12-09T14:50:03.482469+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:47:50Z\",\n \"avg_ns\": 9064855331,\n \"stddev_ns\": 553702,\n \"avg_ts\": 56.481872,\n \"stddev_ts\": 0.003399,\n \"samples_ns\": [ 9064225503, 9065173173, 9065167318 ],\n \"samples_ts\": [ 56.4858, 56.4799, 56.4799 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:48:26Z\",\n \"avg_ns\": 32174546918,\n \"stddev_ns\": 11795932,\n \"avg_ts\": 15.913201,\n \"stddev_ts\": 0.005833,\n \"samples_ns\": [ 32168214955, 32188156735, 32167269064 ],\n \"samples_ts\": [ 15.9163, 15.9065, 15.9168 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:47:50Z", + "avg_ns": 9064855331, + "stddev_ns": 553702, + "avg_ts": 56.481872, + "stddev_ts": 0.003399, + "samples_ns": [ + 9064225503, + 9065173173, + 9065167318 + ], + "samples_ts": [ + 56.4858, + 56.4799, + 56.4799 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:48:26Z", + "avg_ns": 32174546918, + "stddev_ns": 11795932, + "avg_ts": 15.913201, + "stddev_ts": 0.005833, + "samples_ns": [ + 32168214955, + 32188156735, + 32167269064 + ], + "samples_ts": [ + 15.9163, + 15.9065, + 15.9168 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 815 + }, + { + "timestamp_utc": "2025-12-09T14:50:37.244837+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:50:04Z\",\n \"avg_ns\": 2207363133,\n \"stddev_ns\": 2554011,\n \"avg_ts\": 57.987792,\n \"stddev_ts\": 0.067058,\n \"samples_ns\": [ 2210247082, 2206455111, 2205387206 ],\n \"samples_ts\": [ 57.9121, 58.0116, 58.0397 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:50:13Z\",\n \"avg_ns\": 7920669864,\n \"stddev_ns\": 15576326,\n \"avg_ts\": 16.160291,\n \"stddev_ts\": 0.031745,\n \"samples_ns\": [ 7910252616, 7913181059, 7938575918 ],\n \"samples_ts\": [ 16.1815, 16.1755, 16.1238 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:50:04Z", + "avg_ns": 2207363133, + "stddev_ns": 2554011, + "avg_ts": 57.987792, + "stddev_ts": 0.067058, + "samples_ns": [ + 2210247082, + 2206455111, + 2205387206 + ], + "samples_ts": [ + 57.9121, + 58.0116, + 58.0397 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:50:13Z", + "avg_ns": 7920669864, + "stddev_ns": 15576326, + "avg_ts": 16.160291, + "stddev_ts": 0.031745, + "samples_ns": [ + 7910252616, + 7913181059, + 7938575918 + ], + "samples_ts": [ + 16.1815, + 16.1755, + 16.1238 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 816 + }, + { + "timestamp_utc": "2025-12-09T14:52:23.915565+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:50:38Z\",\n \"avg_ns\": 2204914212,\n \"stddev_ns\": 1387411,\n \"avg_ts\": 58.052161,\n \"stddev_ts\": 0.036517,\n \"samples_ns\": [ 2203861639, 2204394564, 2206486433 ],\n \"samples_ts\": [ 58.0799, 58.0658, 58.0108 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:50:47Z\",\n \"avg_ns\": 32211253157,\n \"stddev_ns\": 16173918,\n \"avg_ts\": 15.895069,\n \"stddev_ts\": 0.007983,\n \"samples_ns\": [ 32218859594, 32222220519, 32192679359 ],\n \"samples_ts\": [ 15.8913, 15.8897, 15.9042 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:50:38Z", + "avg_ns": 2204914212, + "stddev_ns": 1387411, + "avg_ts": 58.052161, + "stddev_ts": 0.036517, + "samples_ns": [ + 2203861639, + 2204394564, + 2206486433 + ], + "samples_ts": [ + 58.0799, + 58.0658, + 58.0108 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:50:47Z", + "avg_ns": 32211253157, + "stddev_ns": 16173918, + "avg_ts": 15.895069, + "stddev_ts": 0.007983, + "samples_ns": [ + 32218859594, + 32222220519, + 32192679359 + ], + "samples_ts": [ + 15.8913, + 15.8897, + 15.9042 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 817 + }, + { + "timestamp_utc": "2025-12-09T14:53:24.375759+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:52:24Z\",\n \"avg_ns\": 8862923950,\n \"stddev_ns\": 2755253,\n \"avg_ts\": 57.768749,\n \"stddev_ts\": 0.017947,\n \"samples_ns\": [ 8865930021, 8862318253, 8860523577 ],\n \"samples_ts\": [ 57.7492, 57.7727, 57.7844 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:53:00Z\",\n \"avg_ns\": 7942695913,\n \"stddev_ns\": 729509,\n \"avg_ts\": 16.115435,\n \"stddev_ts\": 0.001469,\n \"samples_ns\": [ 7941880319, 7943262913, 7942944508 ],\n \"samples_ts\": [ 16.1171, 16.1143, 16.1149 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:52:24Z", + "avg_ns": 8862923950, + "stddev_ns": 2755253, + "avg_ts": 57.768749, + "stddev_ts": 0.017947, + "samples_ns": [ + 8865930021, + 8862318253, + 8860523577 + ], + "samples_ts": [ + 57.7492, + 57.7727, + 57.7844 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:53:00Z", + "avg_ns": 7942695913, + "stddev_ns": 729509, + "avg_ts": 16.115435, + "stddev_ts": 0.001469, + "samples_ns": [ + 7941880319, + 7943262913, + 7942944508 + ], + "samples_ts": [ + 16.1171, + 16.1143, + 16.1149 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 818 + }, + { + "timestamp_utc": "2025-12-09T14:55:37.419629+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:53:25Z\",\n \"avg_ns\": 8868358545,\n \"stddev_ns\": 1984618,\n \"avg_ts\": 57.733347,\n \"stddev_ts\": 0.012904,\n \"samples_ns\": [ 8866710234, 8870558253, 8867807149 ],\n \"samples_ts\": [ 57.7441, 57.719, 57.7369 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:54:00Z\",\n \"avg_ns\": 32132375647,\n \"stddev_ns\": 9259148,\n \"avg_ts\": 15.934086,\n \"stddev_ts\": 0.004590,\n \"samples_ns\": [ 32131619769, 32123519347, 32141987826 ],\n \"samples_ts\": [ 15.9345, 15.9385, 15.9293 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:53:25Z", + "avg_ns": 8868358545, + "stddev_ns": 1984618, + "avg_ts": 57.733347, + "stddev_ts": 0.012904, + "samples_ns": [ + 8866710234, + 8870558253, + 8867807149 + ], + "samples_ts": [ + 57.7441, + 57.719, + 57.7369 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:54:00Z", + "avg_ns": 32132375647, + "stddev_ns": 9259148, + "avg_ts": 15.934086, + "stddev_ts": 0.00459, + "samples_ns": [ + 32131619769, + 32123519347, + 32141987826 + ], + "samples_ts": [ + 15.9345, + 15.9385, + 15.9293 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 819 + }, + { + "timestamp_utc": "2025-12-09T14:56:11.262173+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:55:38Z\",\n \"avg_ns\": 2199540306,\n \"stddev_ns\": 6447929,\n \"avg_ts\": 58.194311,\n \"stddev_ts\": 0.170339,\n \"samples_ns\": [ 2206867753, 2197017985, 2194735182 ],\n \"samples_ts\": [ 58.0008, 58.2608, 58.3214 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:55:47Z\",\n \"avg_ns\": 7954063991,\n \"stddev_ns\": 16957834,\n \"avg_ts\": 16.092451,\n \"stddev_ts\": 0.034274,\n \"samples_ns\": [ 7973245693, 7947881201, 7941065079 ],\n \"samples_ts\": [ 16.0537, 16.1049, 16.1187 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:55:38Z", + "avg_ns": 2199540306, + "stddev_ns": 6447929, + "avg_ts": 58.194311, + "stddev_ts": 0.170339, + "samples_ns": [ + 2206867753, + 2197017985, + 2194735182 + ], + "samples_ts": [ + 58.0008, + 58.2608, + 58.3214 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:55:47Z", + "avg_ns": 7954063991, + "stddev_ns": 16957834, + "avg_ts": 16.092451, + "stddev_ts": 0.034274, + "samples_ns": [ + 7973245693, + 7947881201, + 7941065079 + ], + "samples_ts": [ + 16.0537, + 16.1049, + 16.1187 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 820 + }, + { + "timestamp_utc": "2025-12-09T14:57:57.640426+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:56:12Z\",\n \"avg_ns\": 2197882562,\n \"stddev_ns\": 9548257,\n \"avg_ts\": 58.238601,\n \"stddev_ts\": 0.252370,\n \"samples_ns\": [ 2208906767, 2192501657, 2192239263 ],\n \"samples_ts\": [ 57.9472, 58.3808, 58.3878 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:56:21Z\",\n \"avg_ns\": 32131257628,\n \"stddev_ns\": 19765653,\n \"avg_ts\": 15.934643,\n \"stddev_ts\": 0.009799,\n \"samples_ns\": [ 32114217167, 32152924487, 32126631232 ],\n \"samples_ts\": [ 15.9431, 15.9239, 15.9369 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:56:12Z", + "avg_ns": 2197882562, + "stddev_ns": 9548257, + "avg_ts": 58.238601, + "stddev_ts": 0.25237, + "samples_ns": [ + 2208906767, + 2192501657, + 2192239263 + ], + "samples_ts": [ + 57.9472, + 58.3808, + 58.3878 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:56:21Z", + "avg_ns": 32131257628, + "stddev_ns": 19765653, + "avg_ts": 15.934643, + "stddev_ts": 0.009799, + "samples_ns": [ + 32114217167, + 32152924487, + 32126631232 + ], + "samples_ts": [ + 15.9431, + 15.9239, + 15.9369 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 821 + }, + { + "timestamp_utc": "2025-12-09T14:58:58.107235+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:57:58Z\",\n \"avg_ns\": 8850827120,\n \"stddev_ns\": 37769064,\n \"avg_ts\": 57.848405,\n \"stddev_ts\": 0.247455,\n \"samples_ns\": [ 8807292304, 8870351042, 8874838015 ],\n \"samples_ts\": [ 58.1336, 57.7204, 57.6912 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:58:34Z\",\n \"avg_ns\": 7964399316,\n \"stddev_ns\": 2664830,\n \"avg_ts\": 16.071521,\n \"stddev_ts\": 0.005376,\n \"samples_ns\": [ 7962864753, 7962856801, 7967476394 ],\n \"samples_ts\": [ 16.0746, 16.0746, 16.0653 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:57:58Z", + "avg_ns": 8850827120, + "stddev_ns": 37769064, + "avg_ts": 57.848405, + "stddev_ts": 0.247455, + "samples_ns": [ + 8807292304, + 8870351042, + 8874838015 + ], + "samples_ts": [ + 58.1336, + 57.7204, + 57.6912 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T14:58:34Z", + "avg_ns": 7964399316, + "stddev_ns": 2664830, + "avg_ts": 16.071521, + "stddev_ts": 0.005376, + "samples_ns": [ + 7962864753, + 7962856801, + 7967476394 + ], + "samples_ts": [ + 16.0746, + 16.0746, + 16.0653 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 822 + }, + { + "timestamp_utc": "2025-12-09T15:01:11.193696+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:58:59Z\",\n \"avg_ns\": 8870273163,\n \"stddev_ns\": 1939515,\n \"avg_ts\": 57.720885,\n \"stddev_ts\": 0.012619,\n \"samples_ns\": [ 8872491228, 8869432203, 8868896058 ],\n \"samples_ts\": [ 57.7065, 57.7264, 57.7298 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T14:59:34Z\",\n \"avg_ns\": 32140400146,\n \"stddev_ns\": 19713216,\n \"avg_ts\": 15.930111,\n \"stddev_ts\": 0.009767,\n \"samples_ns\": [ 32162838961, 32132490339, 32125871139 ],\n \"samples_ts\": [ 15.919, 15.934, 15.9373 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T14:58:59Z", + "avg_ns": 8870273163, + "stddev_ns": 1939515, + "avg_ts": 57.720885, + "stddev_ts": 0.012619, + "samples_ns": [ + 8872491228, + 8869432203, + 8868896058 + ], + "samples_ts": [ + 57.7065, + 57.7264, + 57.7298 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T14:59:34Z", + "avg_ns": 32140400146, + "stddev_ns": 19713216, + "avg_ts": 15.930111, + "stddev_ts": 0.009767, + "samples_ns": [ + 32162838961, + 32132490339, + 32125871139 + ], + "samples_ts": [ + 15.919, + 15.934, + 15.9373 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 823 + }, + { + "timestamp_utc": "2025-12-09T15:01:45.039678+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:01:12Z\",\n \"avg_ns\": 2206749735,\n \"stddev_ns\": 663002,\n \"avg_ts\": 58.003862,\n \"stddev_ts\": 0.017383,\n \"samples_ns\": [ 2207433478, 2206702377, 2206113351 ],\n \"samples_ts\": [ 57.9859, 58.0051, 58.0206 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:01:21Z\",\n \"avg_ns\": 7955544734,\n \"stddev_ns\": 1862512,\n \"avg_ts\": 16.089408,\n \"stddev_ts\": 0.003758,\n \"samples_ns\": [ 7957059466, 7953471226, 7956103512 ],\n \"samples_ts\": [ 16.0863, 16.0936, 16.0883 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:01:12Z", + "avg_ns": 2206749735, + "stddev_ns": 663002, + "avg_ts": 58.003862, + "stddev_ts": 0.017383, + "samples_ns": [ + 2207433478, + 2206702377, + 2206113351 + ], + "samples_ts": [ + 57.9859, + 58.0051, + 58.0206 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:01:21Z", + "avg_ns": 7955544734, + "stddev_ns": 1862512, + "avg_ts": 16.089408, + "stddev_ts": 0.003758, + "samples_ns": [ + 7957059466, + 7953471226, + 7956103512 + ], + "samples_ts": [ + 16.0863, + 16.0936, + 16.0883 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 824 + }, + { + "timestamp_utc": "2025-12-09T15:03:30.964879+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:01:46Z\",\n \"avg_ns\": 2199111805,\n \"stddev_ns\": 6883139,\n \"avg_ts\": 58.205697,\n \"stddev_ts\": 0.181977,\n \"samples_ns\": [ 2193285373, 2197343448, 2206706594 ],\n \"samples_ts\": [ 58.3599, 58.2522, 58.005 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:01:54Z\",\n \"avg_ns\": 31983552584,\n \"stddev_ns\": 19976009,\n \"avg_ts\": 16.008232,\n \"stddev_ts\": 0.009994,\n \"samples_ns\": [ 32006434822, 31969605597, 31974617335 ],\n \"samples_ts\": [ 15.9968, 16.0152, 16.0127 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:01:46Z", + "avg_ns": 2199111805, + "stddev_ns": 6883139, + "avg_ts": 58.205697, + "stddev_ts": 0.181977, + "samples_ns": [ + 2193285373, + 2197343448, + 2206706594 + ], + "samples_ts": [ + 58.3599, + 58.2522, + 58.005 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:01:54Z", + "avg_ns": 31983552584, + "stddev_ns": 19976009, + "avg_ts": 16.008232, + "stddev_ts": 0.009994, + "samples_ns": [ + 32006434822, + 31969605597, + 31974617335 + ], + "samples_ts": [ + 15.9968, + 16.0152, + 16.0127 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 825 + }, + { + "timestamp_utc": "2025-12-09T15:04:32.163321+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:03:32Z\",\n \"avg_ns\": 9052716977,\n \"stddev_ns\": 18769676,\n \"avg_ts\": 56.557768,\n \"stddev_ts\": 0.117400,\n \"samples_ns\": [ 9031093032, 9064788472, 9062269429 ],\n \"samples_ts\": [ 56.693, 56.4823, 56.498 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:04:08Z\",\n \"avg_ns\": 7940283145,\n \"stddev_ns\": 6764130,\n \"avg_ts\": 16.120340,\n \"stddev_ts\": 0.013729,\n \"samples_ns\": [ 7947205418, 7933691643, 7939952376 ],\n \"samples_ts\": [ 16.1063, 16.1337, 16.121 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:03:32Z", + "avg_ns": 9052716977, + "stddev_ns": 18769676, + "avg_ts": 56.557768, + "stddev_ts": 0.1174, + "samples_ns": [ + 9031093032, + 9064788472, + 9062269429 + ], + "samples_ts": [ + 56.693, + 56.4823, + 56.498 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:04:08Z", + "avg_ns": 7940283145, + "stddev_ns": 6764130, + "avg_ts": 16.12034, + "stddev_ts": 0.013729, + "samples_ns": [ + 7947205418, + 7933691643, + 7939952376 + ], + "samples_ts": [ + 16.1063, + 16.1337, + 16.121 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 826 + }, + { + "timestamp_utc": "2025-12-09T15:06:45.955742+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:04:33Z\",\n \"avg_ns\": 9063638831,\n \"stddev_ns\": 4964418,\n \"avg_ts\": 56.489464,\n \"stddev_ts\": 0.030943,\n \"samples_ns\": [ 9065462267, 9058021511, 9067432716 ],\n \"samples_ts\": [ 56.4781, 56.5245, 56.4658 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:05:09Z\",\n \"avg_ns\": 32119277741,\n \"stddev_ns\": 14906679,\n \"avg_ts\": 15.940585,\n \"stddev_ts\": 0.007400,\n \"samples_ns\": [ 32102160504, 32126268164, 32129404555 ],\n \"samples_ts\": [ 15.9491, 15.9371, 15.9356 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:04:33Z", + "avg_ns": 9063638831, + "stddev_ns": 4964418, + "avg_ts": 56.489464, + "stddev_ts": 0.030943, + "samples_ns": [ + 9065462267, + 9058021511, + 9067432716 + ], + "samples_ts": [ + 56.4781, + 56.5245, + 56.4658 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:05:09Z", + "avg_ns": 32119277741, + "stddev_ns": 14906679, + "avg_ts": 15.940585, + "stddev_ts": 0.0074, + "samples_ns": [ + 32102160504, + 32126268164, + 32129404555 + ], + "samples_ts": [ + 15.9491, + 15.9371, + 15.9356 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 827 + }, + { + "timestamp_utc": "2025-12-09T15:07:19.352720+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:06:46Z\",\n \"avg_ns\": 1708545437,\n \"stddev_ns\": 6486439,\n \"avg_ts\": 74.918249,\n \"stddev_ts\": 0.285038,\n \"samples_ns\": [ 1701055914, 1712317598, 1712262801 ],\n \"samples_ts\": [ 75.2474, 74.7525, 74.7549 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:06:53Z\",\n \"avg_ns\": 8459935858,\n \"stddev_ns\": 35477673,\n \"avg_ts\": 15.130315,\n \"stddev_ts\": 0.063337,\n \"samples_ns\": [ 8448711137, 8499667900, 8431428539 ],\n \"samples_ts\": [ 15.1502, 15.0594, 15.1813 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:06:46Z", + "avg_ns": 1708545437, + "stddev_ns": 6486439, + "avg_ts": 74.918249, + "stddev_ts": 0.285038, + "samples_ns": [ + 1701055914, + 1712317598, + 1712262801 + ], + "samples_ts": [ + 75.2474, + 74.7525, + 74.7549 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:06:53Z", + "avg_ns": 8459935858, + "stddev_ns": 35477673, + "avg_ts": 15.130315, + "stddev_ts": 0.063337, + "samples_ns": [ + 8448711137, + 8499667900, + 8431428539 + ], + "samples_ts": [ + 15.1502, + 15.0594, + 15.1813 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 828 + }, + { + "timestamp_utc": "2025-12-09T15:09:11.384357+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:07:20Z\",\n \"avg_ns\": 1709823490,\n \"stddev_ns\": 7346461,\n \"avg_ts\": 74.862450,\n \"stddev_ts\": 0.321784,\n \"samples_ns\": [ 1702256510, 1710286680, 1716927281 ],\n \"samples_ts\": [ 75.1943, 74.8413, 74.5518 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:07:27Z\",\n \"avg_ns\": 34670139095,\n \"stddev_ns\": 765846666,\n \"avg_ts\": 14.772518,\n \"stddev_ts\": 0.323689,\n \"samples_ns\": [ 34028298644, 34464227522, 35517891119 ],\n \"samples_ts\": [ 15.0463, 14.856, 14.4153 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:07:20Z", + "avg_ns": 1709823490, + "stddev_ns": 7346461, + "avg_ts": 74.86245, + "stddev_ts": 0.321784, + "samples_ns": [ + 1702256510, + 1710286680, + 1716927281 + ], + "samples_ts": [ + 75.1943, + 74.8413, + 74.5518 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:07:27Z", + "avg_ns": 34670139095, + "stddev_ns": 765846666, + "avg_ts": 14.772518, + "stddev_ts": 0.323689, + "samples_ns": [ + 34028298644, + 34464227522, + 35517891119 + ], + "samples_ts": [ + 15.0463, + 14.856, + 14.4153 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 829 + }, + { + "timestamp_utc": "2025-12-09T15:10:05.915878+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:09:12Z\",\n \"avg_ns\": 6824029725,\n \"stddev_ns\": 5163992,\n \"avg_ts\": 75.029010,\n \"stddev_ts\": 0.056753,\n \"samples_ns\": [ 6820819378, 6829986573, 6821283224 ],\n \"samples_ts\": [ 75.0643, 74.9635, 75.0592 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:09:39Z\",\n \"avg_ns\": 8674154510,\n \"stddev_ns\": 93651322,\n \"avg_ts\": 14.757633,\n \"stddev_ts\": 0.159999,\n \"samples_ns\": [ 8570280811, 8700048246, 8752134475 ],\n \"samples_ts\": [ 14.9353, 14.7126, 14.625 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:09:12Z", + "avg_ns": 6824029725, + "stddev_ns": 5163992, + "avg_ts": 75.02901, + "stddev_ts": 0.056753, + "samples_ns": [ + 6820819378, + 6829986573, + 6821283224 + ], + "samples_ts": [ + 75.0643, + 74.9635, + 75.0592 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:09:39Z", + "avg_ns": 8674154510, + "stddev_ns": 93651322, + "avg_ts": 14.757633, + "stddev_ts": 0.159999, + "samples_ns": [ + 8570280811, + 8700048246, + 8752134475 + ], + "samples_ts": [ + 14.9353, + 14.7126, + 14.625 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 830 + }, + { + "timestamp_utc": "2025-12-09T15:12:20.822813+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:10:06Z\",\n \"avg_ns\": 6819554953,\n \"stddev_ns\": 3277187,\n \"avg_ts\": 75.078225,\n \"stddev_ts\": 0.036087,\n \"samples_ns\": [ 6822182441, 6815882780, 6820599638 ],\n \"samples_ts\": [ 75.0493, 75.1187, 75.0667 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:10:34Z\",\n \"avg_ns\": 35462486002,\n \"stddev_ns\": 98461181,\n \"avg_ts\": 14.437866,\n \"stddev_ts\": 0.040135,\n \"samples_ns\": [ 35352101127, 35541257852, 35494099027 ],\n \"samples_ts\": [ 14.4829, 14.4058, 14.4249 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:10:06Z", + "avg_ns": 6819554953, + "stddev_ns": 3277187, + "avg_ts": 75.078225, + "stddev_ts": 0.036087, + "samples_ns": [ + 6822182441, + 6815882780, + 6820599638 + ], + "samples_ts": [ + 75.0493, + 75.1187, + 75.0667 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:10:34Z", + "avg_ns": 35462486002, + "stddev_ns": 98461181, + "avg_ts": 14.437866, + "stddev_ts": 0.040135, + "samples_ns": [ + 35352101127, + 35541257852, + 35494099027 + ], + "samples_ts": [ + 14.4829, + 14.4058, + 14.4249 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 831 + }, + { + "timestamp_utc": "2025-12-09T15:12:54.912093+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:12:21Z\",\n \"avg_ns\": 1707706089,\n \"stddev_ns\": 2840042,\n \"avg_ts\": 74.954488,\n \"stddev_ts\": 0.124540,\n \"samples_ns\": [ 1705819488, 1710972404, 1706326375 ],\n \"samples_ts\": [ 75.0372, 74.8113, 75.015 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:12:28Z\",\n \"avg_ns\": 8680703412,\n \"stddev_ns\": 17412042,\n \"avg_ts\": 14.745388,\n \"stddev_ts\": 0.029607,\n \"samples_ns\": [ 8660818177, 8688076280, 8693215780 ],\n \"samples_ts\": [ 14.7792, 14.7328, 14.7241 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:12:21Z", + "avg_ns": 1707706089, + "stddev_ns": 2840042, + "avg_ts": 74.954488, + "stddev_ts": 0.12454, + "samples_ns": [ + 1705819488, + 1710972404, + 1706326375 + ], + "samples_ts": [ + 75.0372, + 74.8113, + 75.015 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:12:28Z", + "avg_ns": 8680703412, + "stddev_ns": 17412042, + "avg_ts": 14.745388, + "stddev_ts": 0.029607, + "samples_ns": [ + 8660818177, + 8688076280, + 8693215780 + ], + "samples_ts": [ + 14.7792, + 14.7328, + 14.7241 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 832 + }, + { + "timestamp_utc": "2025-12-09T15:14:49.578137+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:12:55Z\",\n \"avg_ns\": 1700886500,\n \"stddev_ns\": 1407728,\n \"avg_ts\": 75.254909,\n \"stddev_ts\": 0.062257,\n \"samples_ns\": [ 1700273336, 1699889348, 1702496816 ],\n \"samples_ts\": [ 75.282, 75.299, 75.1837 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:13:02Z\",\n \"avg_ns\": 35553677618,\n \"stddev_ns\": 32417514,\n \"avg_ts\": 14.400769,\n \"stddev_ts\": 0.013125,\n \"samples_ns\": [ 35527765926, 35590027862, 35543239067 ],\n \"samples_ts\": [ 14.4113, 14.3861, 14.405 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:12:55Z", + "avg_ns": 1700886500, + "stddev_ns": 1407728, + "avg_ts": 75.254909, + "stddev_ts": 0.062257, + "samples_ns": [ + 1700273336, + 1699889348, + 1702496816 + ], + "samples_ts": [ + 75.282, + 75.299, + 75.1837 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:13:02Z", + "avg_ns": 35553677618, + "stddev_ns": 32417514, + "avg_ts": 14.400769, + "stddev_ts": 0.013125, + "samples_ns": [ + 35527765926, + 35590027862, + 35543239067 + ], + "samples_ts": [ + 14.4113, + 14.3861, + 14.405 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 833 + }, + { + "timestamp_utc": "2025-12-09T15:15:44.536319+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:14:50Z\",\n \"avg_ns\": 6889686643,\n \"stddev_ns\": 8778598,\n \"avg_ts\": 74.314055,\n \"stddev_ts\": 0.094716,\n \"samples_ns\": [ 6891503592, 6897413795, 6880142544 ],\n \"samples_ts\": [ 74.2944, 74.2307, 74.4171 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:15:18Z\",\n \"avg_ns\": 8745137693,\n \"stddev_ns\": 39373071,\n \"avg_ts\": 14.636903,\n \"stddev_ts\": 0.066071,\n \"samples_ns\": [ 8699692781, 8766716140, 8769004158 ],\n \"samples_ts\": [ 14.7132, 14.6007, 14.5969 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:14:50Z", + "avg_ns": 6889686643, + "stddev_ns": 8778598, + "avg_ts": 74.314055, + "stddev_ts": 0.094716, + "samples_ns": [ + 6891503592, + 6897413795, + 6880142544 + ], + "samples_ts": [ + 74.2944, + 74.2307, + 74.4171 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:15:18Z", + "avg_ns": 8745137693, + "stddev_ns": 39373071, + "avg_ts": 14.636903, + "stddev_ts": 0.066071, + "samples_ns": [ + 8699692781, + 8766716140, + 8769004158 + ], + "samples_ts": [ + 14.7132, + 14.6007, + 14.5969 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 834 + }, + { + "timestamp_utc": "2025-12-09T15:17:59.733027+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:15:45Z\",\n \"avg_ns\": 6889053862,\n \"stddev_ns\": 12100108,\n \"avg_ts\": 74.320954,\n \"stddev_ts\": 0.130409,\n \"samples_ns\": [ 6880944630, 6883255256, 6902961701 ],\n \"samples_ts\": [ 74.4084, 74.3834, 74.1711 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:16:13Z\",\n \"avg_ns\": 35467942207,\n \"stddev_ns\": 55625576,\n \"avg_ts\": 14.435595,\n \"stddev_ts\": 0.022660,\n \"samples_ns\": [ 35497030521, 35403804338, 35502991764 ],\n \"samples_ts\": [ 14.4237, 14.4617, 14.4213 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:15:45Z", + "avg_ns": 6889053862, + "stddev_ns": 12100108, + "avg_ts": 74.320954, + "stddev_ts": 0.130409, + "samples_ns": [ + 6880944630, + 6883255256, + 6902961701 + ], + "samples_ts": [ + 74.4084, + 74.3834, + 74.1711 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:16:13Z", + "avg_ns": 35467942207, + "stddev_ns": 55625576, + "avg_ts": 14.435595, + "stddev_ts": 0.02266, + "samples_ns": [ + 35497030521, + 35403804338, + 35502991764 + ], + "samples_ts": [ + 14.4237, + 14.4617, + 14.4213 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 835 + }, + { + "timestamp_utc": "2025-12-09T15:18:34.021672+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:18:00Z\",\n \"avg_ns\": 1702347713,\n \"stddev_ns\": 2134636,\n \"avg_ts\": 75.190358,\n \"stddev_ts\": 0.094317,\n \"samples_ns\": [ 1703549060, 1703610060, 1699884021 ],\n \"samples_ts\": [ 75.1373, 75.1346, 75.2993 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:18:07Z\",\n \"avg_ns\": 8753398209,\n \"stddev_ns\": 17578950,\n \"avg_ts\": 14.622932,\n \"stddev_ts\": 0.029344,\n \"samples_ns\": [ 8738509376, 8772790177, 8748895076 ],\n \"samples_ts\": [ 14.6478, 14.5906, 14.6304 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:18:00Z", + "avg_ns": 1702347713, + "stddev_ns": 2134636, + "avg_ts": 75.190358, + "stddev_ts": 0.094317, + "samples_ns": [ + 1703549060, + 1703610060, + 1699884021 + ], + "samples_ts": [ + 75.1373, + 75.1346, + 75.2993 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:18:07Z", + "avg_ns": 8753398209, + "stddev_ns": 17578950, + "avg_ts": 14.622932, + "stddev_ts": 0.029344, + "samples_ns": [ + 8738509376, + 8772790177, + 8748895076 + ], + "samples_ts": [ + 14.6478, + 14.5906, + 14.6304 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 836 + }, + { + "timestamp_utc": "2025-12-09T15:20:28.527717+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:18:35Z\",\n \"avg_ns\": 1696548383,\n \"stddev_ns\": 2455689,\n \"avg_ts\": 75.447408,\n \"stddev_ts\": 0.109103,\n \"samples_ns\": [ 1694957976, 1699376238, 1695310936 ],\n \"samples_ts\": [ 75.5181, 75.3218, 75.5024 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:18:41Z\",\n \"avg_ns\": 35489390189,\n \"stddev_ns\": 19287806,\n \"avg_ts\": 14.426850,\n \"stddev_ts\": 0.007842,\n \"samples_ns\": [ 35468055513, 35494528318, 35505586738 ],\n \"samples_ts\": [ 14.4355, 14.4248, 14.4203 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:18:35Z", + "avg_ns": 1696548383, + "stddev_ns": 2455689, + "avg_ts": 75.447408, + "stddev_ts": 0.109103, + "samples_ns": [ + 1694957976, + 1699376238, + 1695310936 + ], + "samples_ts": [ + 75.5181, + 75.3218, + 75.5024 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:18:41Z", + "avg_ns": 35489390189, + "stddev_ns": 19287806, + "avg_ts": 14.42685, + "stddev_ts": 0.007842, + "samples_ns": [ + 35468055513, + 35494528318, + 35505586738 + ], + "samples_ts": [ + 14.4355, + 14.4248, + 14.4203 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 837 + }, + { + "timestamp_utc": "2025-12-09T15:21:24.299040+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:20:29Z\",\n \"avg_ns\": 7054892944,\n \"stddev_ns\": 4546066,\n \"avg_ts\": 72.573765,\n \"stddev_ts\": 0.046758,\n \"samples_ns\": [ 7059788416, 7054086088, 7050804328 ],\n \"samples_ts\": [ 72.5234, 72.582, 72.6158 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:20:57Z\",\n \"avg_ns\": 8780035296,\n \"stddev_ns\": 12031731,\n \"avg_ts\": 14.578547,\n \"stddev_ts\": 0.019978,\n \"samples_ns\": [ 8767933879, 8791996109, 8780175900 ],\n \"samples_ts\": [ 14.5987, 14.5587, 14.5783 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:20:29Z", + "avg_ns": 7054892944, + "stddev_ns": 4546066, + "avg_ts": 72.573765, + "stddev_ts": 0.046758, + "samples_ns": [ + 7059788416, + 7054086088, + 7050804328 + ], + "samples_ts": [ + 72.5234, + 72.582, + 72.6158 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:20:57Z", + "avg_ns": 8780035296, + "stddev_ns": 12031731, + "avg_ts": 14.578547, + "stddev_ts": 0.019978, + "samples_ns": [ + 8767933879, + 8791996109, + 8780175900 + ], + "samples_ts": [ + 14.5987, + 14.5587, + 14.5783 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 838 + }, + { + "timestamp_utc": "2025-12-09T15:23:40.430583+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:21:25Z\",\n \"avg_ns\": 7058185451,\n \"stddev_ns\": 5499947,\n \"avg_ts\": 72.539920,\n \"stddev_ts\": 0.056536,\n \"samples_ns\": [ 7051869017, 7060784833, 7061902505 ],\n \"samples_ts\": [ 72.6049, 72.5132, 72.5017 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:21:53Z\",\n \"avg_ns\": 35569491681,\n \"stddev_ns\": 66244954,\n \"avg_ts\": 14.394391,\n \"stddev_ts\": 0.026807,\n \"samples_ns\": [ 35568527688, 35503734522, 35636212835 ],\n \"samples_ts\": [ 14.3947, 14.421, 14.3674 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:21:25Z", + "avg_ns": 7058185451, + "stddev_ns": 5499947, + "avg_ts": 72.53992, + "stddev_ts": 0.056536, + "samples_ns": [ + 7051869017, + 7060784833, + 7061902505 + ], + "samples_ts": [ + 72.6049, + 72.5132, + 72.5017 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:21:53Z", + "avg_ns": 35569491681, + "stddev_ns": 66244954, + "avg_ts": 14.394391, + "stddev_ts": 0.026807, + "samples_ns": [ + 35568527688, + 35503734522, + 35636212835 + ], + "samples_ts": [ + 14.3947, + 14.421, + 14.3674 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 839 + }, + { + "timestamp_utc": "2025-12-09T15:24:14.814583+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:23:41Z\",\n \"avg_ns\": 1702231426,\n \"stddev_ns\": 1594410,\n \"avg_ts\": 75.195460,\n \"stddev_ts\": 0.070395,\n \"samples_ns\": [ 1701245089, 1701378303, 1704070886 ],\n \"samples_ts\": [ 75.239, 75.2331, 75.1142 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:23:48Z\",\n \"avg_ns\": 8793506498,\n \"stddev_ns\": 66579516,\n \"avg_ts\": 14.556754,\n \"stddev_ts\": 0.110629,\n \"samples_ns\": [ 8717915660, 8843440375, 8819163460 ],\n \"samples_ts\": [ 14.6824, 14.474, 14.5138 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:23:41Z", + "avg_ns": 1702231426, + "stddev_ns": 1594410, + "avg_ts": 75.19546, + "stddev_ts": 0.070395, + "samples_ns": [ + 1701245089, + 1701378303, + 1704070886 + ], + "samples_ts": [ + 75.239, + 75.2331, + 75.1142 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:23:48Z", + "avg_ns": 8793506498, + "stddev_ns": 66579516, + "avg_ts": 14.556754, + "stddev_ts": 0.110629, + "samples_ns": [ + 8717915660, + 8843440375, + 8819163460 + ], + "samples_ts": [ + 14.6824, + 14.474, + 14.5138 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 840 + }, + { + "timestamp_utc": "2025-12-09T15:26:09.173539+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:24:15Z\",\n \"avg_ns\": 1701070518,\n \"stddev_ns\": 4956914,\n \"avg_ts\": 75.247160,\n \"stddev_ts\": 0.219360,\n \"samples_ns\": [ 1701536827, 1695896926, 1705777801 ],\n \"samples_ts\": [ 75.2261, 75.4763, 75.0391 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:24:22Z\",\n \"avg_ns\": 35452441137,\n \"stddev_ns\": 42535663,\n \"avg_ts\": 14.441897,\n \"stddev_ts\": 0.017328,\n \"samples_ns\": [ 35452722467, 35494835438, 35409765506 ],\n \"samples_ts\": [ 14.4418, 14.4246, 14.4593 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:24:15Z", + "avg_ns": 1701070518, + "stddev_ns": 4956914, + "avg_ts": 75.24716, + "stddev_ts": 0.21936, + "samples_ns": [ + 1701536827, + 1695896926, + 1705777801 + ], + "samples_ts": [ + 75.2261, + 75.4763, + 75.0391 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:24:22Z", + "avg_ns": 35452441137, + "stddev_ns": 42535663, + "avg_ts": 14.441897, + "stddev_ts": 0.017328, + "samples_ns": [ + 35452722467, + 35494835438, + 35409765506 + ], + "samples_ts": [ + 14.4418, + 14.4246, + 14.4593 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 841 + }, + { + "timestamp_utc": "2025-12-09T15:27:03.843710+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:26:10Z\",\n \"avg_ns\": 6824240094,\n \"stddev_ns\": 7715357,\n \"avg_ts\": 75.026733,\n \"stddev_ts\": 0.084840,\n \"samples_ns\": [ 6816112870, 6825143341, 6831464071 ],\n \"samples_ts\": [ 75.1161, 75.0167, 74.9473 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:26:37Z\",\n \"avg_ns\": 8731180353,\n \"stddev_ns\": 40507912,\n \"avg_ts\": 14.660314,\n \"stddev_ts\": 0.068180,\n \"samples_ns\": [ 8684935677, 8760382448, 8748222934 ],\n \"samples_ts\": [ 14.7382, 14.6112, 14.6315 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:26:10Z", + "avg_ns": 6824240094, + "stddev_ns": 7715357, + "avg_ts": 75.026733, + "stddev_ts": 0.08484, + "samples_ns": [ + 6816112870, + 6825143341, + 6831464071 + ], + "samples_ts": [ + 75.1161, + 75.0167, + 74.9473 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:26:37Z", + "avg_ns": 8731180353, + "stddev_ns": 40507912, + "avg_ts": 14.660314, + "stddev_ts": 0.06818, + "samples_ns": [ + 8684935677, + 8760382448, + 8748222934 + ], + "samples_ts": [ + 14.7382, + 14.6112, + 14.6315 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 842 + }, + { + "timestamp_utc": "2025-12-09T15:29:18.525804+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:27:04Z\",\n \"avg_ns\": 6820518729,\n \"stddev_ns\": 1538241,\n \"avg_ts\": 75.067607,\n \"stddev_ts\": 0.016930,\n \"samples_ns\": [ 6820354847, 6819068990, 6822132350 ],\n \"samples_ts\": [ 75.0694, 75.0836, 75.0498 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:27:32Z\",\n \"avg_ns\": 35378485601,\n \"stddev_ns\": 175115644,\n \"avg_ts\": 14.472309,\n \"stddev_ts\": 0.071824,\n \"samples_ns\": [ 35455244479, 35502113908, 35178098417 ],\n \"samples_ts\": [ 14.4407, 14.4217, 14.5545 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:27:04Z", + "avg_ns": 6820518729, + "stddev_ns": 1538241, + "avg_ts": 75.067607, + "stddev_ts": 0.01693, + "samples_ns": [ + 6820354847, + 6819068990, + 6822132350 + ], + "samples_ts": [ + 75.0694, + 75.0836, + 75.0498 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:27:32Z", + "avg_ns": 35378485601, + "stddev_ns": 175115644, + "avg_ts": 14.472309, + "stddev_ts": 0.071824, + "samples_ns": [ + 35455244479, + 35502113908, + 35178098417 + ], + "samples_ts": [ + 14.4407, + 14.4217, + 14.5545 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 843 + }, + { + "timestamp_utc": "2025-12-09T15:29:52.956472+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:29:19Z\",\n \"avg_ns\": 1705169464,\n \"stddev_ns\": 2978872,\n \"avg_ts\": 75.066005,\n \"stddev_ts\": 0.131012,\n \"samples_ns\": [ 1703155394, 1708591312, 1703761686 ],\n \"samples_ts\": [ 75.1546, 74.9155, 75.1279 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:29:26Z\",\n \"avg_ns\": 8806006997,\n \"stddev_ns\": 40744573,\n \"avg_ts\": 14.535740,\n \"stddev_ts\": 0.067435,\n \"samples_ns\": [ 8758971130, 8830432127, 8828617736 ],\n \"samples_ts\": [ 14.6136, 14.4953, 14.4983 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:29:19Z", + "avg_ns": 1705169464, + "stddev_ns": 2978872, + "avg_ts": 75.066005, + "stddev_ts": 0.131012, + "samples_ns": [ + 1703155394, + 1708591312, + 1703761686 + ], + "samples_ts": [ + 75.1546, + 74.9155, + 75.1279 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:29:26Z", + "avg_ns": 8806006997, + "stddev_ns": 40744573, + "avg_ts": 14.53574, + "stddev_ts": 0.067435, + "samples_ns": [ + 8758971130, + 8830432127, + 8828617736 + ], + "samples_ts": [ + 14.6136, + 14.4953, + 14.4983 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 844 + }, + { + "timestamp_utc": "2025-12-09T15:31:47.725385+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:29:54Z\",\n \"avg_ns\": 1702659988,\n \"stddev_ns\": 1046883,\n \"avg_ts\": 75.176508,\n \"stddev_ts\": 0.046171,\n \"samples_ns\": [ 1702189388, 1703858701, 1701931876 ],\n \"samples_ts\": [ 75.1973, 75.1236, 75.2087 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:30:00Z\",\n \"avg_ns\": 35572279727,\n \"stddev_ns\": 88132724,\n \"avg_ts\": 14.393289,\n \"stddev_ts\": 0.035705,\n \"samples_ns\": [ 35636716506, 35471846474, 35608276202 ],\n \"samples_ts\": [ 14.3672, 14.434, 14.3787 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:29:54Z", + "avg_ns": 1702659988, + "stddev_ns": 1046883, + "avg_ts": 75.176508, + "stddev_ts": 0.046171, + "samples_ns": [ + 1702189388, + 1703858701, + 1701931876 + ], + "samples_ts": [ + 75.1973, + 75.1236, + 75.2087 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:30:00Z", + "avg_ns": 35572279727, + "stddev_ns": 88132724, + "avg_ts": 14.393289, + "stddev_ts": 0.035705, + "samples_ns": [ + 35636716506, + 35471846474, + 35608276202 + ], + "samples_ts": [ + 14.3672, + 14.434, + 14.3787 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 845 + }, + { + "timestamp_utc": "2025-12-09T15:32:42.811516+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:31:48Z\",\n \"avg_ns\": 6877801148,\n \"stddev_ns\": 11497921,\n \"avg_ts\": 74.442535,\n \"stddev_ts\": 0.124412,\n \"samples_ns\": [ 6889830713, 6876650072, 6866922661 ],\n \"samples_ts\": [ 74.3124, 74.4549, 74.5603 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:32:16Z\",\n \"avg_ns\": 8792733280,\n \"stddev_ns\": 36430622,\n \"avg_ts\": 14.557643,\n \"stddev_ts\": 0.060457,\n \"samples_ns\": [ 8750800547, 8810798635, 8816600659 ],\n \"samples_ts\": [ 14.6272, 14.5276, 14.5181 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:31:48Z", + "avg_ns": 6877801148, + "stddev_ns": 11497921, + "avg_ts": 74.442535, + "stddev_ts": 0.124412, + "samples_ns": [ + 6889830713, + 6876650072, + 6866922661 + ], + "samples_ts": [ + 74.3124, + 74.4549, + 74.5603 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:32:16Z", + "avg_ns": 8792733280, + "stddev_ns": 36430622, + "avg_ts": 14.557643, + "stddev_ts": 0.060457, + "samples_ns": [ + 8750800547, + 8810798635, + 8816600659 + ], + "samples_ts": [ + 14.6272, + 14.5276, + 14.5181 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 846 + }, + { + "timestamp_utc": "2025-12-09T15:34:57.675082+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:32:43Z\",\n \"avg_ns\": 6882091094,\n \"stddev_ns\": 5476528,\n \"avg_ts\": 74.396024,\n \"stddev_ts\": 0.059193,\n \"samples_ns\": [ 6887370500, 6876439000, 6882463784 ],\n \"samples_ts\": [ 74.339, 74.4571, 74.392 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:33:11Z\",\n \"avg_ns\": 35381200974,\n \"stddev_ns\": 22885450,\n \"avg_ts\": 14.470966,\n \"stddev_ts\": 0.009362,\n \"samples_ns\": [ 35355957645, 35400586795, 35387058484 ],\n \"samples_ts\": [ 14.4813, 14.463, 14.4686 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:32:43Z", + "avg_ns": 6882091094, + "stddev_ns": 5476528, + "avg_ts": 74.396024, + "stddev_ts": 0.059193, + "samples_ns": [ + 6887370500, + 6876439000, + 6882463784 + ], + "samples_ts": [ + 74.339, + 74.4571, + 74.392 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:33:11Z", + "avg_ns": 35381200974, + "stddev_ns": 22885450, + "avg_ts": 14.470966, + "stddev_ts": 0.009362, + "samples_ns": [ + 35355957645, + 35400586795, + 35387058484 + ], + "samples_ts": [ + 14.4813, + 14.463, + 14.4686 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 847 + }, + { + "timestamp_utc": "2025-12-09T15:35:32.048274+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:34:58Z\",\n \"avg_ns\": 1707389918,\n \"stddev_ns\": 3760017,\n \"avg_ts\": 74.968472,\n \"stddev_ts\": 0.164922,\n \"samples_ns\": [ 1711647512, 1705997598, 1704524644 ],\n \"samples_ts\": [ 74.7818, 75.0294, 75.0943 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:35:05Z\",\n \"avg_ns\": 8768839936,\n \"stddev_ns\": 55789777,\n \"avg_ts\": 14.597537,\n \"stddev_ts\": 0.093183,\n \"samples_ns\": [ 8705111993, 8792545939, 8808861877 ],\n \"samples_ts\": [ 14.704, 14.5578, 14.5308 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:34:58Z", + "avg_ns": 1707389918, + "stddev_ns": 3760017, + "avg_ts": 74.968472, + "stddev_ts": 0.164922, + "samples_ns": [ + 1711647512, + 1705997598, + 1704524644 + ], + "samples_ts": [ + 74.7818, + 75.0294, + 75.0943 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:35:05Z", + "avg_ns": 8768839936, + "stddev_ns": 55789777, + "avg_ts": 14.597537, + "stddev_ts": 0.093183, + "samples_ns": [ + 8705111993, + 8792545939, + 8808861877 + ], + "samples_ts": [ + 14.704, + 14.5578, + 14.5308 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 848 + }, + { + "timestamp_utc": "2025-12-09T15:37:26.564418+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:35:33Z\",\n \"avg_ns\": 1698987138,\n \"stddev_ns\": 1610716,\n \"avg_ts\": 75.339050,\n \"stddev_ts\": 0.071433,\n \"samples_ns\": [ 1700232430, 1697168815, 1699560170 ],\n \"samples_ts\": [ 75.2838, 75.4197, 75.3136 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:35:39Z\",\n \"avg_ns\": 35505628823,\n \"stddev_ns\": 149301842,\n \"avg_ts\": 14.420419,\n \"stddev_ts\": 0.060785,\n \"samples_ns\": [ 35333368715, 35597743057, 35585774699 ],\n \"samples_ts\": [ 14.4906, 14.3829, 14.3878 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:35:33Z", + "avg_ns": 1698987138, + "stddev_ns": 1610716, + "avg_ts": 75.33905, + "stddev_ts": 0.071433, + "samples_ns": [ + 1700232430, + 1697168815, + 1699560170 + ], + "samples_ts": [ + 75.2838, + 75.4197, + 75.3136 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:35:39Z", + "avg_ns": 35505628823, + "stddev_ns": 149301842, + "avg_ts": 14.420419, + "stddev_ts": 0.060785, + "samples_ns": [ + 35333368715, + 35597743057, + 35585774699 + ], + "samples_ts": [ + 14.4906, + 14.3829, + 14.3878 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 849 + }, + { + "timestamp_utc": "2025-12-09T15:38:22.296209+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:37:27Z\",\n \"avg_ns\": 7047918344,\n \"stddev_ns\": 8541211,\n \"avg_ts\": 72.645635,\n \"stddev_ts\": 0.087994,\n \"samples_ns\": [ 7040719964, 7057355577, 7045679492 ],\n \"samples_ts\": [ 72.7198, 72.5484, 72.6686 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:37:55Z\",\n \"avg_ns\": 8775972860,\n \"stddev_ns\": 34187715,\n \"avg_ts\": 14.585425,\n \"stddev_ts\": 0.056893,\n \"samples_ns\": [ 8738465843, 8784062934, 8805389804 ],\n \"samples_ts\": [ 14.6479, 14.5718, 14.5366 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:37:27Z", + "avg_ns": 7047918344, + "stddev_ns": 8541211, + "avg_ts": 72.645635, + "stddev_ts": 0.087994, + "samples_ns": [ + 7040719964, + 7057355577, + 7045679492 + ], + "samples_ts": [ + 72.7198, + 72.5484, + 72.6686 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:37:55Z", + "avg_ns": 8775972860, + "stddev_ns": 34187715, + "avg_ts": 14.585425, + "stddev_ts": 0.056893, + "samples_ns": [ + 8738465843, + 8784062934, + 8805389804 + ], + "samples_ts": [ + 14.6479, + 14.5718, + 14.5366 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 850 + }, + { + "timestamp_utc": "2025-12-09T15:40:38.845529+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:38:23Z\",\n \"avg_ns\": 7058041566,\n \"stddev_ns\": 7430388,\n \"avg_ts\": 72.541423,\n \"stddev_ts\": 0.076396,\n \"samples_ns\": [ 7064116669, 7060250295, 7049757735 ],\n \"samples_ts\": [ 72.479, 72.5187, 72.6266 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:38:51Z\",\n \"avg_ns\": 35704006022,\n \"stddev_ns\": 66587611,\n \"avg_ts\": 14.340161,\n \"stddev_ts\": 0.026724,\n \"samples_ns\": [ 35649434492, 35778199139, 35684384437 ],\n \"samples_ts\": [ 14.3621, 14.3104, 14.348 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:38:23Z", + "avg_ns": 7058041566, + "stddev_ns": 7430388, + "avg_ts": 72.541423, + "stddev_ts": 0.076396, + "samples_ns": [ + 7064116669, + 7060250295, + 7049757735 + ], + "samples_ts": [ + 72.479, + 72.5187, + 72.6266 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:38:51Z", + "avg_ns": 35704006022, + "stddev_ns": 66587611, + "avg_ts": 14.340161, + "stddev_ts": 0.026724, + "samples_ns": [ + 35649434492, + 35778199139, + 35684384437 + ], + "samples_ts": [ + 14.3621, + 14.3104, + 14.348 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 851 + }, + { + "timestamp_utc": "2025-12-09T15:41:13.092977+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:40:39Z\",\n \"avg_ns\": 1702872913,\n \"stddev_ns\": 4874021,\n \"avg_ts\": 75.167499,\n \"stddev_ts\": 0.214786,\n \"samples_ns\": [ 1700201834, 1699918537, 1708498369 ],\n \"samples_ts\": [ 75.2852, 75.2977, 74.9196 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:40:46Z\",\n \"avg_ns\": 8740785847,\n \"stddev_ns\": 64726893,\n \"avg_ts\": 14.644529,\n \"stddev_ts\": 0.108775,\n \"samples_ns\": [ 8668598388, 8760107874, 8793651280 ],\n \"samples_ts\": [ 14.7659, 14.6117, 14.556 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:40:39Z", + "avg_ns": 1702872913, + "stddev_ns": 4874021, + "avg_ts": 75.167499, + "stddev_ts": 0.214786, + "samples_ns": [ + 1700201834, + 1699918537, + 1708498369 + ], + "samples_ts": [ + 75.2852, + 75.2977, + 74.9196 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:40:46Z", + "avg_ns": 8740785847, + "stddev_ns": 64726893, + "avg_ts": 14.644529, + "stddev_ts": 0.108775, + "samples_ns": [ + 8668598388, + 8760107874, + 8793651280 + ], + "samples_ts": [ + 14.7659, + 14.6117, + 14.556 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 852 + }, + { + "timestamp_utc": "2025-12-09T15:43:07.427211+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:41:14Z\",\n \"avg_ns\": 1709044594,\n \"stddev_ns\": 7540822,\n \"avg_ts\": 74.896617,\n \"stddev_ts\": 0.329789,\n \"samples_ns\": [ 1706413096, 1717548320, 1703172368 ],\n \"samples_ts\": [ 75.0111, 74.5248, 75.1539 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:41:20Z\",\n \"avg_ns\": 35434463861,\n \"stddev_ns\": 87299150,\n \"avg_ts\": 14.449268,\n \"stddev_ts\": 0.035553,\n \"samples_ns\": [ 35371256218, 35398062786, 35534072581 ],\n \"samples_ts\": [ 14.475, 14.4641, 14.4087 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:41:14Z", + "avg_ns": 1709044594, + "stddev_ns": 7540822, + "avg_ts": 74.896617, + "stddev_ts": 0.329789, + "samples_ns": [ + 1706413096, + 1717548320, + 1703172368 + ], + "samples_ts": [ + 75.0111, + 74.5248, + 75.1539 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:41:20Z", + "avg_ns": 35434463861, + "stddev_ns": 87299150, + "avg_ts": 14.449268, + "stddev_ts": 0.035553, + "samples_ns": [ + 35371256218, + 35398062786, + 35534072581 + ], + "samples_ts": [ + 14.475, + 14.4641, + 14.4087 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 853 + }, + { + "timestamp_utc": "2025-12-09T15:44:02.169018+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:43:08Z\",\n \"avg_ns\": 6835114036,\n \"stddev_ns\": 8360669,\n \"avg_ts\": 74.907384,\n \"stddev_ts\": 0.091583,\n \"samples_ns\": [ 6844399791, 6832758485, 6828183832 ],\n \"samples_ts\": [ 74.8057, 74.9331, 74.9833 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:43:35Z\",\n \"avg_ns\": 8732147935,\n \"stddev_ns\": 74343543,\n \"avg_ts\": 14.659188,\n \"stddev_ts\": 0.125056,\n \"samples_ns\": [ 8652607604, 8799881724, 8743954477 ],\n \"samples_ts\": [ 14.7932, 14.5457, 14.6387 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:43:08Z", + "avg_ns": 6835114036, + "stddev_ns": 8360669, + "avg_ts": 74.907384, + "stddev_ts": 0.091583, + "samples_ns": [ + 6844399791, + 6832758485, + 6828183832 + ], + "samples_ts": [ + 74.8057, + 74.9331, + 74.9833 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:43:35Z", + "avg_ns": 8732147935, + "stddev_ns": 74343543, + "avg_ts": 14.659188, + "stddev_ts": 0.125056, + "samples_ns": [ + 8652607604, + 8799881724, + 8743954477 + ], + "samples_ts": [ + 14.7932, + 14.5457, + 14.6387 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 854 + }, + { + "timestamp_utc": "2025-12-09T15:46:17.045126+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:44:03Z\",\n \"avg_ns\": 6825027489,\n \"stddev_ns\": 15118639,\n \"avg_ts\": 75.018258,\n \"stddev_ts\": 0.166047,\n \"samples_ns\": [ 6841680845, 6821235760, 6812165863 ],\n \"samples_ts\": [ 74.8354, 75.0597, 75.1596 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:44:30Z\",\n \"avg_ns\": 35457235990,\n \"stddev_ns\": 100981322,\n \"avg_ts\": 14.440008,\n \"stddev_ts\": 0.041151,\n \"samples_ns\": [ 35549749571, 35349512498, 35472445901 ],\n \"samples_ts\": [ 14.4024, 14.4839, 14.4337 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:44:03Z", + "avg_ns": 6825027489, + "stddev_ns": 15118639, + "avg_ts": 75.018258, + "stddev_ts": 0.166047, + "samples_ns": [ + 6841680845, + 6821235760, + 6812165863 + ], + "samples_ts": [ + 74.8354, + 75.0597, + 75.1596 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:44:30Z", + "avg_ns": 35457235990, + "stddev_ns": 100981322, + "avg_ts": 14.440008, + "stddev_ts": 0.041151, + "samples_ns": [ + 35549749571, + 35349512498, + 35472445901 + ], + "samples_ts": [ + 14.4024, + 14.4839, + 14.4337 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 855 + }, + { + "timestamp_utc": "2025-12-09T15:46:51.400101+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:46:18Z\",\n \"avg_ns\": 1704672303,\n \"stddev_ns\": 696740,\n \"avg_ts\": 75.087754,\n \"stddev_ts\": 0.030575,\n \"samples_ns\": [ 1705471471, 1704217608, 1704327832 ],\n \"samples_ts\": [ 75.0526, 75.1078, 75.1029 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:46:24Z\",\n \"avg_ns\": 8771146678,\n \"stddev_ns\": 83209663,\n \"avg_ts\": 14.594183,\n \"stddev_ts\": 0.139185,\n \"samples_ns\": [ 8675470201, 8826622928, 8811346907 ],\n \"samples_ts\": [ 14.7542, 14.5016, 14.5267 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:46:18Z", + "avg_ns": 1704672303, + "stddev_ns": 696740, + "avg_ts": 75.087754, + "stddev_ts": 0.030575, + "samples_ns": [ + 1705471471, + 1704217608, + 1704327832 + ], + "samples_ts": [ + 75.0526, + 75.1078, + 75.1029 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:46:24Z", + "avg_ns": 8771146678, + "stddev_ns": 83209663, + "avg_ts": 14.594183, + "stddev_ts": 0.139185, + "samples_ns": [ + 8675470201, + 8826622928, + 8811346907 + ], + "samples_ts": [ + 14.7542, + 14.5016, + 14.5267 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 856 + }, + { + "timestamp_utc": "2025-12-09T15:48:45.923320+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:46:52Z\",\n \"avg_ns\": 1705163017,\n \"stddev_ns\": 3627736,\n \"avg_ts\": 75.066363,\n \"stddev_ts\": 0.159538,\n \"samples_ns\": [ 1704016507, 1709224997, 1702247549 ],\n \"samples_ts\": [ 75.1166, 74.8877, 75.1947 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:46:59Z\",\n \"avg_ns\": 35501898062,\n \"stddev_ns\": 21399270,\n \"avg_ts\": 14.421768,\n \"stddev_ts\": 0.008696,\n \"samples_ns\": [ 35511740930, 35477348402, 35516604854 ],\n \"samples_ts\": [ 14.4178, 14.4317, 14.4158 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:46:52Z", + "avg_ns": 1705163017, + "stddev_ns": 3627736, + "avg_ts": 75.066363, + "stddev_ts": 0.159538, + "samples_ns": [ + 1704016507, + 1709224997, + 1702247549 + ], + "samples_ts": [ + 75.1166, + 74.8877, + 75.1947 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:46:59Z", + "avg_ns": 35501898062, + "stddev_ns": 21399270, + "avg_ts": 14.421768, + "stddev_ts": 0.008696, + "samples_ns": [ + 35511740930, + 35477348402, + 35516604854 + ], + "samples_ts": [ + 14.4178, + 14.4317, + 14.4158 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 857 + }, + { + "timestamp_utc": "2025-12-09T15:49:40.935288+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:48:47Z\",\n \"avg_ns\": 6873250743,\n \"stddev_ns\": 1390220,\n \"avg_ts\": 74.491683,\n \"stddev_ts\": 0.015012,\n \"samples_ns\": [ 6874842799, 6872320600, 6872588832 ],\n \"samples_ts\": [ 74.4744, 74.5018, 74.4989 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:49:14Z\",\n \"avg_ns\": 8762076401,\n \"stddev_ns\": 45032263,\n \"avg_ts\": 14.608666,\n \"stddev_ts\": 0.074867,\n \"samples_ns\": [ 8732043553, 8740331321, 8813854331 ],\n \"samples_ts\": [ 14.6587, 14.6448, 14.5226 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:48:47Z", + "avg_ns": 6873250743, + "stddev_ns": 1390220, + "avg_ts": 74.491683, + "stddev_ts": 0.015012, + "samples_ns": [ + 6874842799, + 6872320600, + 6872588832 + ], + "samples_ts": [ + 74.4744, + 74.5018, + 74.4989 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:49:14Z", + "avg_ns": 8762076401, + "stddev_ns": 45032263, + "avg_ts": 14.608666, + "stddev_ts": 0.074867, + "samples_ns": [ + 8732043553, + 8740331321, + 8813854331 + ], + "samples_ts": [ + 14.6587, + 14.6448, + 14.5226 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 858 + }, + { + "timestamp_utc": "2025-12-09T15:51:56.233799+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:49:41Z\",\n \"avg_ns\": 6871918340,\n \"stddev_ns\": 9940472,\n \"avg_ts\": 74.506228,\n \"stddev_ts\": 0.107788,\n \"samples_ns\": [ 6861588690, 6872750385, 6881415947 ],\n \"samples_ts\": [ 74.6183, 74.4971, 74.4033 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:50:09Z\",\n \"avg_ns\": 35535585451,\n \"stddev_ns\": 65797107,\n \"avg_ts\": 14.408125,\n \"stddev_ts\": 0.026685,\n \"samples_ns\": [ 35541998347, 35466817244, 35597940764 ],\n \"samples_ts\": [ 14.4055, 14.436, 14.3829 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:49:41Z", + "avg_ns": 6871918340, + "stddev_ns": 9940472, + "avg_ts": 74.506228, + "stddev_ts": 0.107788, + "samples_ns": [ + 6861588690, + 6872750385, + 6881415947 + ], + "samples_ts": [ + 74.6183, + 74.4971, + 74.4033 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:50:09Z", + "avg_ns": 35535585451, + "stddev_ns": 65797107, + "avg_ts": 14.408125, + "stddev_ts": 0.026685, + "samples_ns": [ + 35541998347, + 35466817244, + 35597940764 + ], + "samples_ts": [ + 14.4055, + 14.436, + 14.3829 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 859 + }, + { + "timestamp_utc": "2025-12-09T15:52:30.619021+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:51:57Z\",\n \"avg_ns\": 1706125991,\n \"stddev_ns\": 1307010,\n \"avg_ts\": 75.023797,\n \"stddev_ts\": 0.057499,\n \"samples_ns\": [ 1706832652, 1704617780, 1706927541 ],\n \"samples_ts\": [ 74.9927, 75.0901, 74.9885 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:52:04Z\",\n \"avg_ns\": 8789410808,\n \"stddev_ns\": 57653092,\n \"avg_ts\": 14.563396,\n \"stddev_ts\": 0.095600,\n \"samples_ns\": [ 8729686196, 8844740708, 8793805521 ],\n \"samples_ts\": [ 14.6626, 14.4719, 14.5557 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:51:57Z", + "avg_ns": 1706125991, + "stddev_ns": 1307010, + "avg_ts": 75.023797, + "stddev_ts": 0.057499, + "samples_ns": [ + 1706832652, + 1704617780, + 1706927541 + ], + "samples_ts": [ + 74.9927, + 75.0901, + 74.9885 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:52:04Z", + "avg_ns": 8789410808, + "stddev_ns": 57653092, + "avg_ts": 14.563396, + "stddev_ts": 0.0956, + "samples_ns": [ + 8729686196, + 8844740708, + 8793805521 + ], + "samples_ts": [ + 14.6626, + 14.4719, + 14.5557 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 860 + }, + { + "timestamp_utc": "2025-12-09T15:54:25.279564+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:52:31Z\",\n \"avg_ns\": 1701185554,\n \"stddev_ns\": 1662506,\n \"avg_ts\": 75.241693,\n \"stddev_ts\": 0.073444,\n \"samples_ns\": [ 1703103923, 1700205842, 1700246899 ],\n \"samples_ts\": [ 75.1569, 75.285, 75.2832 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:52:38Z\",\n \"avg_ns\": 35542196865,\n \"stddev_ns\": 160270241,\n \"avg_ts\": 14.405608,\n \"stddev_ts\": 0.065039,\n \"samples_ns\": [ 35368996635, 35572336280, 35685257680 ],\n \"samples_ts\": [ 14.476, 14.3932, 14.3477 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:52:31Z", + "avg_ns": 1701185554, + "stddev_ns": 1662506, + "avg_ts": 75.241693, + "stddev_ts": 0.073444, + "samples_ns": [ + 1703103923, + 1700205842, + 1700246899 + ], + "samples_ts": [ + 75.1569, + 75.285, + 75.2832 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:52:38Z", + "avg_ns": 35542196865, + "stddev_ns": 160270241, + "avg_ts": 14.405608, + "stddev_ts": 0.065039, + "samples_ns": [ + 35368996635, + 35572336280, + 35685257680 + ], + "samples_ts": [ + 14.476, + 14.3932, + 14.3477 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 861 + }, + { + "timestamp_utc": "2025-12-09T15:55:21.064718+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:54:26Z\",\n \"avg_ns\": 7047304187,\n \"stddev_ns\": 13987289,\n \"avg_ts\": 72.652085,\n \"stddev_ts\": 0.144063,\n \"samples_ns\": [ 7063084306, 7042393744, 7036434512 ],\n \"samples_ts\": [ 72.4896, 72.7026, 72.7641 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:54:54Z\",\n \"avg_ns\": 8800359477,\n \"stddev_ns\": 11526370,\n \"avg_ts\": 14.544877,\n \"stddev_ts\": 0.019038,\n \"samples_ns\": [ 8796279578, 8813370490, 8791428364 ],\n \"samples_ts\": [ 14.5516, 14.5234, 14.5596 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:54:26Z", + "avg_ns": 7047304187, + "stddev_ns": 13987289, + "avg_ts": 72.652085, + "stddev_ts": 0.144063, + "samples_ns": [ + 7063084306, + 7042393744, + 7036434512 + ], + "samples_ts": [ + 72.4896, + 72.7026, + 72.7641 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:54:54Z", + "avg_ns": 8800359477, + "stddev_ns": 11526370, + "avg_ts": 14.544877, + "stddev_ts": 0.019038, + "samples_ns": [ + 8796279578, + 8813370490, + 8791428364 + ], + "samples_ts": [ + 14.5516, + 14.5234, + 14.5596 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 862 + }, + { + "timestamp_utc": "2025-12-09T15:57:36.934522+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:55:22Z\",\n \"avg_ns\": 7045823132,\n \"stddev_ns\": 9642604,\n \"avg_ts\": 72.667257,\n \"stddev_ts\": 0.099385,\n \"samples_ns\": [ 7056725080, 7038412445, 7042331871 ],\n \"samples_ts\": [ 72.5549, 72.7437, 72.7032 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 1B Q2_K - Medium\",\n \"model_size\": 683281408,\n \"model_n_params\": 999885952,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:55:50Z\",\n \"avg_ns\": 35478633446,\n \"stddev_ns\": 116109447,\n \"avg_ts\": 14.431324,\n \"stddev_ts\": 0.047152,\n \"samples_ns\": [ 35392342868, 35610642791, 35432914680 ],\n \"samples_ts\": [ 14.4664, 14.3777, 14.4498 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:55:22Z", + "avg_ns": 7045823132, + "stddev_ns": 9642604, + "avg_ts": 72.667257, + "stddev_ts": 0.099385, + "samples_ns": [ + 7056725080, + 7038412445, + 7042331871 + ], + "samples_ts": [ + 72.5549, + 72.7437, + 72.7032 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_type": "gemma3 1B Q2_K - Medium", + "model_size": 683281408, + "model_n_params": 999885952, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T15:55:50Z", + "avg_ns": 35478633446, + "stddev_ns": 116109447, + "avg_ts": 14.431324, + "stddev_ts": 0.047152, + "samples_ns": [ + 35392342868, + 35610642791, + 35432914680 + ], + "samples_ts": [ + 14.4664, + 14.3777, + 14.4498 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-1B-it-GGUF/gemma-3-1b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-1B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 863 + }, + { + "timestamp_utc": "2025-12-09T16:02:09.439255+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:58:18Z\",\n \"avg_ns\": 23926395102,\n \"stddev_ns\": 18874196,\n \"avg_ts\": 5.349743,\n \"stddev_ts\": 0.004218,\n \"samples_ns\": [ 23948114526, 23917095917, 23913974863 ],\n \"samples_ts\": [ 5.34489, 5.35182, 5.35252 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T15:59:55Z\",\n \"avg_ns\": 44276647991,\n \"stddev_ns\": 64056405,\n \"avg_ts\": 2.890918,\n \"stddev_ts\": 0.004179,\n \"samples_ns\": [ 44350594250, 44238196274, 44241153449 ],\n \"samples_ts\": [ 2.88609, 2.89343, 2.89323 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T15:58:18Z", + "avg_ns": 23926395102, + "stddev_ns": 18874196, + "avg_ts": 5.349743, + "stddev_ts": 0.004218, + "samples_ns": [ + 23948114526, + 23917095917, + 23913974863 + ], + "samples_ts": [ + 5.34489, + 5.35182, + 5.35252 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T15:59:55Z", + "avg_ns": 44276647991, + "stddev_ns": 64056405, + "avg_ts": 2.890918, + "stddev_ts": 0.004179, + "samples_ns": [ + 44350594250, + 44238196274, + 44241153449 + ], + "samples_ts": [ + 2.88609, + 2.89343, + 2.89323 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 864 + }, + { + "timestamp_utc": "2025-12-09T16:12:46.971392+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:02:11Z\",\n \"avg_ns\": 23938610654,\n \"stddev_ns\": 14340034,\n \"avg_ts\": 5.347012,\n \"stddev_ts\": 0.003202,\n \"samples_ns\": [ 23955168908, 23930400565, 23930262489 ],\n \"samples_ts\": [ 5.34331, 5.34884, 5.34888 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:03:47Z\",\n \"avg_ns\": 179642013862,\n \"stddev_ns\": 21803139,\n \"avg_ts\": 2.850113,\n \"stddev_ts\": 0.000346,\n \"samples_ns\": [ 179665819101, 179637195041, 179623027445 ],\n \"samples_ts\": [ 2.84974, 2.85019, 2.85041 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T16:02:11Z", + "avg_ns": 23938610654, + "stddev_ns": 14340034, + "avg_ts": 5.347012, + "stddev_ts": 0.003202, + "samples_ns": [ + 23955168908, + 23930400565, + 23930262489 + ], + "samples_ts": [ + 5.34331, + 5.34884, + 5.34888 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T16:03:47Z", + "avg_ns": 179642013862, + "stddev_ns": 21803139, + "avg_ts": 2.850113, + "stddev_ts": 0.000346, + "samples_ns": [ + 179665819101, + 179637195041, + 179623027445 + ], + "samples_ts": [ + 2.84974, + 2.85019, + 2.85041 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 865 + }, + { + "timestamp_utc": "2025-12-09T16:21:27.748883+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:12:49Z\",\n \"avg_ns\": 96307655341,\n \"stddev_ns\": 4121778,\n \"avg_ts\": 5.316296,\n \"stddev_ts\": 0.000227,\n \"samples_ns\": [ 96303702965, 96311906786, 96307356273 ],\n \"samples_ts\": [ 5.31651, 5.31606, 5.31631 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:19:14Z\",\n \"avg_ns\": 44278049516,\n \"stddev_ns\": 11405763,\n \"avg_ts\": 2.890823,\n \"stddev_ts\": 0.000745,\n \"samples_ns\": [ 44291030235, 44273487143, 44269631170 ],\n \"samples_ts\": [ 2.88998, 2.89112, 2.89137 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T16:12:49Z", + "avg_ns": 96307655341, + "stddev_ns": 4121778, + "avg_ts": 5.316296, + "stddev_ts": 0.000227, + "samples_ns": [ + 96303702965, + 96311906786, + 96307356273 + ], + "samples_ts": [ + 5.31651, + 5.31606, + 5.31631 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T16:19:14Z", + "avg_ns": 44278049516, + "stddev_ns": 11405763, + "avg_ts": 2.890823, + "stddev_ts": 0.000745, + "samples_ns": [ + 44291030235, + 44273487143, + 44269631170 + ], + "samples_ts": [ + 2.88998, + 2.89112, + 2.89137 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 866 + }, + { + "timestamp_utc": "2025-12-09T16:36:53.280553+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:21:29Z\",\n \"avg_ns\": 96287116916,\n \"stddev_ns\": 1088177,\n \"avg_ts\": 5.317430,\n \"stddev_ts\": 0.000058,\n \"samples_ns\": [ 96288229213, 96286160844, 96286960692 ],\n \"samples_ts\": [ 5.31737, 5.31748, 5.31744 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:27:55Z\",\n \"avg_ns\": 179245323571,\n \"stddev_ns\": 7704227,\n \"avg_ts\": 2.856420,\n \"stddev_ts\": 0.000123,\n \"samples_ns\": [ 179254217236, 179240697417, 179241056060 ],\n \"samples_ts\": [ 2.85628, 2.85649, 2.85649 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T16:21:29Z", + "avg_ns": 96287116916, + "stddev_ns": 1088177, + "avg_ts": 5.31743, + "stddev_ts": 5.8e-05, + "samples_ns": [ + 96288229213, + 96286160844, + 96286960692 + ], + "samples_ts": [ + 5.31737, + 5.31748, + 5.31744 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T16:27:55Z", + "avg_ns": 179245323571, + "stddev_ns": 7704227, + "avg_ts": 2.85642, + "stddev_ts": 0.000123, + "samples_ns": [ + 179254217236, + 179240697417, + 179241056060 + ], + "samples_ts": [ + 2.85628, + 2.85649, + 2.85649 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 867 + }, + { + "timestamp_utc": "2025-12-09T16:40:44.442174+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:36:55Z\",\n \"avg_ns\": 23941538797,\n \"stddev_ns\": 2130542,\n \"avg_ts\": 5.346356,\n \"stddev_ts\": 0.000476,\n \"samples_ns\": [ 23943785366, 23939547235, 23941283790 ],\n \"samples_ts\": [ 5.34585, 5.3468, 5.34641 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:38:31Z\",\n \"avg_ns\": 44257880830,\n \"stddev_ns\": 5039899,\n \"avg_ts\": 2.892140,\n \"stddev_ts\": 0.000329,\n \"samples_ns\": [ 44263165155, 44257339445, 44253137891 ],\n \"samples_ts\": [ 2.8918, 2.89218, 2.89245 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T16:36:55Z", + "avg_ns": 23941538797, + "stddev_ns": 2130542, + "avg_ts": 5.346356, + "stddev_ts": 0.000476, + "samples_ns": [ + 23943785366, + 23939547235, + 23941283790 + ], + "samples_ts": [ + 5.34585, + 5.3468, + 5.34641 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T16:38:31Z", + "avg_ns": 44257880830, + "stddev_ns": 5039899, + "avg_ts": 2.89214, + "stddev_ts": 0.000329, + "samples_ns": [ + 44263165155, + 44257339445, + 44253137891 + ], + "samples_ts": [ + 2.8918, + 2.89218, + 2.89245 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 868 + }, + { + "timestamp_utc": "2025-12-09T16:51:21.010810+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:40:46Z\",\n \"avg_ns\": 23913470593,\n \"stddev_ns\": 902287,\n \"avg_ts\": 5.352632,\n \"stddev_ts\": 0.000196,\n \"samples_ns\": [ 23914423866, 23912702841, 23913285074 ],\n \"samples_ts\": [ 5.35242, 5.3528, 5.35267 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:42:22Z\",\n \"avg_ns\": 179434857419,\n \"stddev_ns\": 4160455,\n \"avg_ts\": 2.853403,\n \"stddev_ts\": 0.000066,\n \"samples_ns\": [ 179439017766, 179430740436, 179434814056 ],\n \"samples_ts\": [ 2.85334, 2.85347, 2.8534 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T16:40:46Z", + "avg_ns": 23913470593, + "stddev_ns": 902287, + "avg_ts": 5.352632, + "stddev_ts": 0.000196, + "samples_ns": [ + 23914423866, + 23912702841, + 23913285074 + ], + "samples_ts": [ + 5.35242, + 5.3528, + 5.35267 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T16:42:22Z", + "avg_ns": 179434857419, + "stddev_ns": 4160455, + "avg_ts": 2.853403, + "stddev_ts": 6.6e-05, + "samples_ns": [ + 179439017766, + 179430740436, + 179434814056 + ], + "samples_ts": [ + 2.85334, + 2.85347, + 2.8534 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 869 + }, + { + "timestamp_utc": "2025-12-09T17:00:01.975808+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:51:23Z\",\n \"avg_ns\": 96407592480,\n \"stddev_ns\": 1112095,\n \"avg_ts\": 5.310785,\n \"stddev_ts\": 0.000059,\n \"samples_ns\": [ 96408342683, 96406369885, 96408064873 ],\n \"samples_ts\": [ 5.31074, 5.31085, 5.31076 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T16:57:48Z\",\n \"avg_ns\": 44233231529,\n \"stddev_ns\": 8757924,\n \"avg_ts\": 2.893752,\n \"stddev_ts\": 0.000573,\n \"samples_ns\": [ 44243336134, 44227896856, 44228461598 ],\n \"samples_ts\": [ 2.89309, 2.8941, 2.89406 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T16:51:23Z", + "avg_ns": 96407592480, + "stddev_ns": 1112095, + "avg_ts": 5.310785, + "stddev_ts": 5.9e-05, + "samples_ns": [ + 96408342683, + 96406369885, + 96408064873 + ], + "samples_ts": [ + 5.31074, + 5.31085, + 5.31076 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T16:57:48Z", + "avg_ns": 44233231529, + "stddev_ns": 8757924, + "avg_ts": 2.893752, + "stddev_ts": 0.000573, + "samples_ns": [ + 44243336134, + 44227896856, + 44228461598 + ], + "samples_ts": [ + 2.89309, + 2.8941, + 2.89406 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 870 + }, + { + "timestamp_utc": "2025-12-09T17:15:27.833535+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:00:04Z\",\n \"avg_ns\": 96279076329,\n \"stddev_ns\": 884432,\n \"avg_ts\": 5.317874,\n \"stddev_ts\": 0.000046,\n \"samples_ns\": [ 96279371218, 96278141026, 96279716744 ],\n \"samples_ts\": [ 5.31786, 5.31793, 5.31784 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:06:29Z\",\n \"avg_ns\": 179368379185,\n \"stddev_ns\": 11032859,\n \"avg_ts\": 2.854461,\n \"stddev_ts\": 0.000175,\n \"samples_ns\": [ 179356672390, 179378542856, 179369922311 ],\n \"samples_ts\": [ 2.85465, 2.8543, 2.85444 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T17:00:04Z", + "avg_ns": 96279076329, + "stddev_ns": 884432, + "avg_ts": 5.317874, + "stddev_ts": 4.6e-05, + "samples_ns": [ + 96279371218, + 96278141026, + 96279716744 + ], + "samples_ts": [ + 5.31786, + 5.31793, + 5.31784 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T17:06:29Z", + "avg_ns": 179368379185, + "stddev_ns": 11032859, + "avg_ts": 2.854461, + "stddev_ts": 0.000175, + "samples_ns": [ + 179356672390, + 179378542856, + 179369922311 + ], + "samples_ts": [ + 2.85465, + 2.8543, + 2.85444 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 871 + }, + { + "timestamp_utc": "2025-12-09T17:19:18.918909+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:15:29Z\",\n \"avg_ns\": 23936372454,\n \"stddev_ns\": 2854056,\n \"avg_ts\": 5.347510,\n \"stddev_ts\": 0.000638,\n \"samples_ns\": [ 23939191016, 23936442166, 23933484180 ],\n \"samples_ts\": [ 5.34688, 5.34749, 5.34816 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:17:05Z\",\n \"avg_ns\": 44243159543,\n \"stddev_ns\": 3272081,\n \"avg_ts\": 2.893103,\n \"stddev_ts\": 0.000214,\n \"samples_ns\": [ 44246913549, 44240912278, 44241652802 ],\n \"samples_ts\": [ 2.89286, 2.89325, 2.8932 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T17:15:29Z", + "avg_ns": 23936372454, + "stddev_ns": 2854056, + "avg_ts": 5.34751, + "stddev_ts": 0.000638, + "samples_ns": [ + 23939191016, + 23936442166, + 23933484180 + ], + "samples_ts": [ + 5.34688, + 5.34749, + 5.34816 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T17:17:05Z", + "avg_ns": 44243159543, + "stddev_ns": 3272081, + "avg_ts": 2.893103, + "stddev_ts": 0.000214, + "samples_ns": [ + 44246913549, + 44240912278, + 44241652802 + ], + "samples_ts": [ + 2.89286, + 2.89325, + 2.8932 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 872 + }, + { + "timestamp_utc": "2025-12-09T17:29:55.933748+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:19:21Z\",\n \"avg_ns\": 23936516894,\n \"stddev_ns\": 1152960,\n \"avg_ts\": 5.347478,\n \"stddev_ts\": 0.000255,\n \"samples_ns\": [ 23935702891, 23937823024, 23936024768 ],\n \"samples_ts\": [ 5.34766, 5.34719, 5.34759 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:20:56Z\",\n \"avg_ns\": 179547397809,\n \"stddev_ns\": 11551865,\n \"avg_ts\": 2.851615,\n \"stddev_ts\": 0.000183,\n \"samples_ns\": [ 179560337372, 179538187173, 179543668884 ],\n \"samples_ts\": [ 2.85141, 2.85176, 2.85167 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T17:19:21Z", + "avg_ns": 23936516894, + "stddev_ns": 1152960, + "avg_ts": 5.347478, + "stddev_ts": 0.000255, + "samples_ns": [ + 23935702891, + 23937823024, + 23936024768 + ], + "samples_ts": [ + 5.34766, + 5.34719, + 5.34759 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T17:20:56Z", + "avg_ns": 179547397809, + "stddev_ns": 11551865, + "avg_ts": 2.851615, + "stddev_ts": 0.000183, + "samples_ns": [ + 179560337372, + 179538187173, + 179543668884 + ], + "samples_ts": [ + 2.85141, + 2.85176, + 2.85167 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 873 + }, + { + "timestamp_utc": "2025-12-09T17:38:40.742857+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:29:58Z\",\n \"avg_ns\": 97258851212,\n \"stddev_ns\": 2753731,\n \"avg_ts\": 5.264302,\n \"stddev_ts\": 0.000147,\n \"samples_ns\": [ 97256104122, 97258909981, 97261539535 ],\n \"samples_ts\": [ 5.26445, 5.2643, 5.26416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:36:27Z\",\n \"avg_ns\": 44385037147,\n \"stddev_ns\": 13975489,\n \"avg_ts\": 2.883855,\n \"stddev_ts\": 0.000908,\n \"samples_ns\": [ 44401022736, 44378957469, 44375131236 ],\n \"samples_ts\": [ 2.88282, 2.88425, 2.8845 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T17:29:58Z", + "avg_ns": 97258851212, + "stddev_ns": 2753731, + "avg_ts": 5.264302, + "stddev_ts": 0.000147, + "samples_ns": [ + 97256104122, + 97258909981, + 97261539535 + ], + "samples_ts": [ + 5.26445, + 5.2643, + 5.26416 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T17:36:27Z", + "avg_ns": 44385037147, + "stddev_ns": 13975489, + "avg_ts": 2.883855, + "stddev_ts": 0.000908, + "samples_ns": [ + 44401022736, + 44378957469, + 44375131236 + ], + "samples_ts": [ + 2.88282, + 2.88425, + 2.8845 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 874 + }, + { + "timestamp_utc": "2025-12-09T17:54:09.682316+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:38:42Z\",\n \"avg_ns\": 97299134617,\n \"stddev_ns\": 2169128,\n \"avg_ts\": 5.262123,\n \"stddev_ts\": 0.000115,\n \"samples_ns\": [ 97296988407, 97299180174, 97301235272 ],\n \"samples_ts\": [ 5.26224, 5.26212, 5.26201 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:45:12Z\",\n \"avg_ns\": 179018705307,\n \"stddev_ns\": 9025117,\n \"avg_ts\": 2.860036,\n \"stddev_ts\": 0.000144,\n \"samples_ns\": [ 179027769424, 179018586417, 179009760082 ],\n \"samples_ts\": [ 2.85989, 2.86004, 2.86018 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T17:38:42Z", + "avg_ns": 97299134617, + "stddev_ns": 2169128, + "avg_ts": 5.262123, + "stddev_ts": 0.000115, + "samples_ns": [ + 97296988407, + 97299180174, + 97301235272 + ], + "samples_ts": [ + 5.26224, + 5.26212, + 5.26201 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T17:45:12Z", + "avg_ns": 179018705307, + "stddev_ns": 9025117, + "avg_ts": 2.860036, + "stddev_ts": 0.000144, + "samples_ns": [ + 179027769424, + 179018586417, + 179009760082 + ], + "samples_ts": [ + 2.85989, + 2.86004, + 2.86018 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 875 + }, + { + "timestamp_utc": "2025-12-09T17:58:00.918224+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:54:11Z\",\n \"avg_ns\": 23928868231,\n \"stddev_ns\": 4487698,\n \"avg_ts\": 5.349188,\n \"stddev_ts\": 0.001003,\n \"samples_ns\": [ 23933987097, 23927006899, 23925610697 ],\n \"samples_ts\": [ 5.34804, 5.3496, 5.34992 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:55:47Z\",\n \"avg_ns\": 44287100089,\n \"stddev_ns\": 4685121,\n \"avg_ts\": 2.890232,\n \"stddev_ts\": 0.000305,\n \"samples_ns\": [ 44284277065, 44284526042, 44292497162 ],\n \"samples_ts\": [ 2.89042, 2.8904, 2.88988 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T17:54:11Z", + "avg_ns": 23928868231, + "stddev_ns": 4487698, + "avg_ts": 5.349188, + "stddev_ts": 0.001003, + "samples_ns": [ + 23933987097, + 23927006899, + 23925610697 + ], + "samples_ts": [ + 5.34804, + 5.3496, + 5.34992 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T17:55:47Z", + "avg_ns": 44287100089, + "stddev_ns": 4685121, + "avg_ts": 2.890232, + "stddev_ts": 0.000305, + "samples_ns": [ + 44284277065, + 44284526042, + 44292497162 + ], + "samples_ts": [ + 2.89042, + 2.8904, + 2.88988 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 876 + }, + { + "timestamp_utc": "2025-12-09T18:08:36.701845+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:58:03Z\",\n \"avg_ns\": 23931422717,\n \"stddev_ns\": 7527147,\n \"avg_ts\": 5.348617,\n \"stddev_ts\": 0.001682,\n \"samples_ns\": [ 23940112427, 23927050967, 23927104758 ],\n \"samples_ts\": [ 5.34667, 5.34959, 5.34958 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T17:59:38Z\",\n \"avg_ns\": 179151424008,\n \"stddev_ns\": 10111328,\n \"avg_ts\": 2.857918,\n \"stddev_ts\": 0.000161,\n \"samples_ns\": [ 179162298584, 179149667298, 179142306142 ],\n \"samples_ts\": [ 2.85774, 2.85795, 2.85806 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T17:58:03Z", + "avg_ns": 23931422717, + "stddev_ns": 7527147, + "avg_ts": 5.348617, + "stddev_ts": 0.001682, + "samples_ns": [ + 23940112427, + 23927050967, + 23927104758 + ], + "samples_ts": [ + 5.34667, + 5.34959, + 5.34958 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T17:59:38Z", + "avg_ns": 179151424008, + "stddev_ns": 10111328, + "avg_ts": 2.857918, + "stddev_ts": 0.000161, + "samples_ns": [ + 179162298584, + 179149667298, + 179142306142 + ], + "samples_ts": [ + 2.85774, + 2.85795, + 2.85806 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 877 + }, + { + "timestamp_utc": "2025-12-09T18:17:17.359302+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:08:38Z\",\n \"avg_ns\": 96291549717,\n \"stddev_ns\": 2131343,\n \"avg_ts\": 5.317185,\n \"stddev_ts\": 0.000118,\n \"samples_ns\": [ 96292122241, 96289190586, 96293336324 ],\n \"samples_ts\": [ 5.31715, 5.31732, 5.31709 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:15:04Z\",\n \"avg_ns\": 44237674951,\n \"stddev_ns\": 10785292,\n \"avg_ts\": 2.893461,\n \"stddev_ts\": 0.000705,\n \"samples_ns\": [ 44249969803, 44229809972, 44233245078 ],\n \"samples_ts\": [ 2.89266, 2.89398, 2.89375 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T18:08:38Z", + "avg_ns": 96291549717, + "stddev_ns": 2131343, + "avg_ts": 5.317185, + "stddev_ts": 0.000118, + "samples_ns": [ + 96292122241, + 96289190586, + 96293336324 + ], + "samples_ts": [ + 5.31715, + 5.31732, + 5.31709 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T18:15:04Z", + "avg_ns": 44237674951, + "stddev_ns": 10785292, + "avg_ts": 2.893461, + "stddev_ts": 0.000705, + "samples_ns": [ + 44249969803, + 44229809972, + 44233245078 + ], + "samples_ts": [ + 2.89266, + 2.89398, + 2.89375 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 878 + }, + { + "timestamp_utc": "2025-12-09T18:32:42.515778+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:17:19Z\",\n \"avg_ns\": 96275209883,\n \"stddev_ns\": 1905201,\n \"avg_ts\": 5.318088,\n \"stddev_ts\": 0.000102,\n \"samples_ns\": [ 96277334177, 96274377470, 96273918004 ],\n \"samples_ts\": [ 5.31797, 5.31813, 5.31816 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:23:44Z\",\n \"avg_ns\": 179128494829,\n \"stddev_ns\": 13519931,\n \"avg_ts\": 2.858283,\n \"stddev_ts\": 0.000216,\n \"samples_ns\": [ 179143706627, 179117908794, 179123869068 ],\n \"samples_ts\": [ 2.85804, 2.85845, 2.85836 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T18:17:19Z", + "avg_ns": 96275209883, + "stddev_ns": 1905201, + "avg_ts": 5.318088, + "stddev_ts": 0.000102, + "samples_ns": [ + 96277334177, + 96274377470, + 96273918004 + ], + "samples_ts": [ + 5.31797, + 5.31813, + 5.31816 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T18:23:44Z", + "avg_ns": 179128494829, + "stddev_ns": 13519931, + "avg_ts": 2.858283, + "stddev_ts": 0.000216, + "samples_ns": [ + 179143706627, + 179117908794, + 179123869068 + ], + "samples_ts": [ + 2.85804, + 2.85845, + 2.85836 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 879 + }, + { + "timestamp_utc": "2025-12-09T18:36:33.626633+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:32:44Z\",\n \"avg_ns\": 23924313064,\n \"stddev_ns\": 1274112,\n \"avg_ts\": 5.350206,\n \"stddev_ts\": 0.000285,\n \"samples_ns\": [ 23925411952, 23922916451, 23924610789 ],\n \"samples_ts\": [ 5.34996, 5.35052, 5.35014 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:34:20Z\",\n \"avg_ns\": 44262402842,\n \"stddev_ns\": 1137824,\n \"avg_ts\": 2.891845,\n \"stddev_ts\": 0.000074,\n \"samples_ns\": [ 44262910867, 44263198152, 44261099507 ],\n \"samples_ts\": [ 2.89181, 2.89179, 2.89193 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T18:32:44Z", + "avg_ns": 23924313064, + "stddev_ns": 1274112, + "avg_ts": 5.350206, + "stddev_ts": 0.000285, + "samples_ns": [ + 23925411952, + 23922916451, + 23924610789 + ], + "samples_ts": [ + 5.34996, + 5.35052, + 5.35014 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T18:34:20Z", + "avg_ns": 44262402842, + "stddev_ns": 1137824, + "avg_ts": 2.891845, + "stddev_ts": 7.4e-05, + "samples_ns": [ + 44262910867, + 44263198152, + 44261099507 + ], + "samples_ts": [ + 2.89181, + 2.89179, + 2.89193 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 880 + }, + { + "timestamp_utc": "2025-12-09T18:47:10.254094+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:36:35Z\",\n \"avg_ns\": 23933944188,\n \"stddev_ns\": 599202,\n \"avg_ts\": 5.348053,\n \"stddev_ts\": 0.000134,\n \"samples_ns\": [ 23934619514, 23933476159, 23933736891 ],\n \"samples_ts\": [ 5.3479, 5.34816, 5.3481 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:38:11Z\",\n \"avg_ns\": 179420545871,\n \"stddev_ns\": 2672446,\n \"avg_ts\": 2.853631,\n \"stddev_ts\": 0.000043,\n \"samples_ns\": [ 179419775788, 179418343018, 179423518807 ],\n \"samples_ts\": [ 2.85364, 2.85367, 2.85358 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T18:36:35Z", + "avg_ns": 23933944188, + "stddev_ns": 599202, + "avg_ts": 5.348053, + "stddev_ts": 0.000134, + "samples_ns": [ + 23934619514, + 23933476159, + 23933736891 + ], + "samples_ts": [ + 5.3479, + 5.34816, + 5.3481 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T18:38:11Z", + "avg_ns": 179420545871, + "stddev_ns": 2672446, + "avg_ts": 2.853631, + "stddev_ts": 4.3e-05, + "samples_ns": [ + 179419775788, + 179418343018, + 179423518807 + ], + "samples_ts": [ + 2.85364, + 2.85367, + 2.85358 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 881 + }, + { + "timestamp_utc": "2025-12-09T18:55:51.598601+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:47:12Z\",\n \"avg_ns\": 96463215179,\n \"stddev_ns\": 1902483,\n \"avg_ts\": 5.307723,\n \"stddev_ts\": 0.000102,\n \"samples_ns\": [ 96463875096, 96461124568, 96464645875 ],\n \"samples_ts\": [ 5.30769, 5.30784, 5.30764 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:53:38Z\",\n \"avg_ns\": 44285480691,\n \"stddev_ns\": 14571532,\n \"avg_ts\": 2.890338,\n \"stddev_ts\": 0.000951,\n \"samples_ns\": [ 44302289230, 44277700960, 44276451884 ],\n \"samples_ts\": [ 2.88924, 2.89085, 2.89093 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T18:47:12Z", + "avg_ns": 96463215179, + "stddev_ns": 1902483, + "avg_ts": 5.307723, + "stddev_ts": 0.000102, + "samples_ns": [ + 96463875096, + 96461124568, + 96464645875 + ], + "samples_ts": [ + 5.30769, + 5.30784, + 5.30764 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T18:53:38Z", + "avg_ns": 44285480691, + "stddev_ns": 14571532, + "avg_ts": 2.890338, + "stddev_ts": 0.000951, + "samples_ns": [ + 44302289230, + 44277700960, + 44276451884 + ], + "samples_ts": [ + 2.88924, + 2.89085, + 2.89093 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 882 + }, + { + "timestamp_utc": "2025-12-09T19:11:17.283317+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T18:55:53Z\",\n \"avg_ns\": 96432866167,\n \"stddev_ns\": 1408845,\n \"avg_ts\": 5.309393,\n \"stddev_ts\": 0.000076,\n \"samples_ns\": [ 96434426507, 96432335836, 96431836159 ],\n \"samples_ts\": [ 5.30931, 5.30942, 5.30945 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:02:19Z\",\n \"avg_ns\": 179099517697,\n \"stddev_ns\": 8301569,\n \"avg_ts\": 2.858746,\n \"stddev_ts\": 0.000132,\n \"samples_ns\": [ 179108842113, 179096685603, 179093025377 ],\n \"samples_ts\": [ 2.8586, 2.85879, 2.85885 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T18:55:53Z", + "avg_ns": 96432866167, + "stddev_ns": 1408845, + "avg_ts": 5.309393, + "stddev_ts": 7.6e-05, + "samples_ns": [ + 96434426507, + 96432335836, + 96431836159 + ], + "samples_ts": [ + 5.30931, + 5.30942, + 5.30945 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T19:02:19Z", + "avg_ns": 179099517697, + "stddev_ns": 8301569, + "avg_ts": 2.858746, + "stddev_ts": 0.000132, + "samples_ns": [ + 179108842113, + 179096685603, + 179093025377 + ], + "samples_ts": [ + 2.8586, + 2.85879, + 2.85885 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 883 + }, + { + "timestamp_utc": "2025-12-09T19:15:08.921198+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:11:19Z\",\n \"avg_ns\": 23965394952,\n \"stddev_ns\": 639471,\n \"avg_ts\": 5.341034,\n \"stddev_ts\": 0.000134,\n \"samples_ns\": [ 23965834801, 23965639672, 23964710385 ],\n \"samples_ts\": [ 5.34094, 5.34098, 5.34119 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:12:55Z\",\n \"avg_ns\": 44388613908,\n \"stddev_ns\": 7926416,\n \"avg_ts\": 2.883622,\n \"stddev_ts\": 0.000515,\n \"samples_ns\": [ 44397759830, 44384099194, 44383982702 ],\n \"samples_ts\": [ 2.88303, 2.88392, 2.88392 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T19:11:19Z", + "avg_ns": 23965394952, + "stddev_ns": 639471, + "avg_ts": 5.341034, + "stddev_ts": 0.000134, + "samples_ns": [ + 23965834801, + 23965639672, + 23964710385 + ], + "samples_ts": [ + 5.34094, + 5.34098, + 5.34119 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T19:12:55Z", + "avg_ns": 44388613908, + "stddev_ns": 7926416, + "avg_ts": 2.883622, + "stddev_ts": 0.000515, + "samples_ns": [ + 44397759830, + 44384099194, + 44383982702 + ], + "samples_ts": [ + 2.88303, + 2.88392, + 2.88392 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 884 + }, + { + "timestamp_utc": "2025-12-09T19:25:44.582324+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:15:11Z\",\n \"avg_ns\": 23915116974,\n \"stddev_ns\": 5617965,\n \"avg_ts\": 5.352263,\n \"stddev_ts\": 0.001257,\n \"samples_ns\": [ 23921591936, 23911573259, 23912185728 ],\n \"samples_ts\": [ 5.35081, 5.35306, 5.35292 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:16:46Z\",\n \"avg_ns\": 179124603355,\n \"stddev_ns\": 7901567,\n \"avg_ts\": 2.858345,\n \"stddev_ts\": 0.000126,\n \"samples_ns\": [ 179133704703, 179119496919, 179120608443 ],\n \"samples_ts\": [ 2.8582, 2.85843, 2.85841 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T19:15:11Z", + "avg_ns": 23915116974, + "stddev_ns": 5617965, + "avg_ts": 5.352263, + "stddev_ts": 0.001257, + "samples_ns": [ + 23921591936, + 23911573259, + 23912185728 + ], + "samples_ts": [ + 5.35081, + 5.35306, + 5.35292 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T19:16:46Z", + "avg_ns": 179124603355, + "stddev_ns": 7901567, + "avg_ts": 2.858345, + "stddev_ts": 0.000126, + "samples_ns": [ + 179133704703, + 179119496919, + 179120608443 + ], + "samples_ts": [ + 2.8582, + 2.85843, + 2.85841 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 885 + }, + { + "timestamp_utc": "2025-12-09T19:34:30.399190+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:25:46Z\",\n \"avg_ns\": 97464845475,\n \"stddev_ns\": 1251285,\n \"avg_ts\": 5.253176,\n \"stddev_ts\": 0.000063,\n \"samples_ns\": [ 97465147257, 97465835848, 97463553322 ],\n \"samples_ts\": [ 5.25316, 5.25312, 5.25325 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:32:16Z\",\n \"avg_ns\": 44394433083,\n \"stddev_ns\": 20545903,\n \"avg_ts\": 2.883245,\n \"stddev_ts\": 0.001334,\n \"samples_ns\": [ 44418116910, 44383791762, 44381390577 ],\n \"samples_ts\": [ 2.88171, 2.88394, 2.88409 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T19:25:46Z", + "avg_ns": 97464845475, + "stddev_ns": 1251285, + "avg_ts": 5.253176, + "stddev_ts": 6.3e-05, + "samples_ns": [ + 97465147257, + 97465835848, + 97463553322 + ], + "samples_ts": [ + 5.25316, + 5.25312, + 5.25325 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T19:32:16Z", + "avg_ns": 44394433083, + "stddev_ns": 20545903, + "avg_ts": 2.883245, + "stddev_ts": 0.001334, + "samples_ns": [ + 44418116910, + 44383791762, + 44381390577 + ], + "samples_ts": [ + 2.88171, + 2.88394, + 2.88409 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 886 + }, + { + "timestamp_utc": "2025-12-09T19:50:00.773723+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:34:32Z\",\n \"avg_ns\": 97344571814,\n \"stddev_ns\": 2672130,\n \"avg_ts\": 5.259667,\n \"stddev_ts\": 0.000143,\n \"samples_ns\": [ 97341884271, 97347190642, 97344640530 ],\n \"samples_ts\": [ 5.25981, 5.25953, 5.25966 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:41:01Z\",\n \"avg_ns\": 179433467410,\n \"stddev_ns\": 21511803,\n \"avg_ts\": 2.853425,\n \"stddev_ts\": 0.000342,\n \"samples_ns\": [ 179458300939, 179421274576, 179420826716 ],\n \"samples_ts\": [ 2.85303, 2.85362, 2.85363 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T19:34:32Z", + "avg_ns": 97344571814, + "stddev_ns": 2672130, + "avg_ts": 5.259667, + "stddev_ts": 0.000143, + "samples_ns": [ + 97341884271, + 97347190642, + 97344640530 + ], + "samples_ts": [ + 5.25981, + 5.25953, + 5.25966 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T19:41:01Z", + "avg_ns": 179433467410, + "stddev_ns": 21511803, + "avg_ts": 2.853425, + "stddev_ts": 0.000342, + "samples_ns": [ + 179458300939, + 179421274576, + 179420826716 + ], + "samples_ts": [ + 2.85303, + 2.85362, + 2.85363 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 887 + }, + { + "timestamp_utc": "2025-12-09T19:53:51.940544+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:50:02Z\",\n \"avg_ns\": 23941382358,\n \"stddev_ns\": 7506551,\n \"avg_ts\": 5.346392,\n \"stddev_ts\": 0.001676,\n \"samples_ns\": [ 23949770066, 23939081408, 23935295600 ],\n \"samples_ts\": [ 5.34452, 5.34691, 5.34775 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:51:38Z\",\n \"avg_ns\": 44253930427,\n \"stddev_ns\": 4735630,\n \"avg_ts\": 2.892398,\n \"stddev_ts\": 0.000310,\n \"samples_ns\": [ 44259279769, 44252237898, 44250273614 ],\n \"samples_ts\": [ 2.89205, 2.89251, 2.89264 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T19:50:02Z", + "avg_ns": 23941382358, + "stddev_ns": 7506551, + "avg_ts": 5.346392, + "stddev_ts": 0.001676, + "samples_ns": [ + 23949770066, + 23939081408, + 23935295600 + ], + "samples_ts": [ + 5.34452, + 5.34691, + 5.34775 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T19:51:38Z", + "avg_ns": 44253930427, + "stddev_ns": 4735630, + "avg_ts": 2.892398, + "stddev_ts": 0.00031, + "samples_ns": [ + 44259279769, + 44252237898, + 44250273614 + ], + "samples_ts": [ + 2.89205, + 2.89251, + 2.89264 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 888 + }, + { + "timestamp_utc": "2025-12-09T20:04:27.503885+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:53:54Z\",\n \"avg_ns\": 23908802780,\n \"stddev_ns\": 2135412,\n \"avg_ts\": 5.353677,\n \"stddev_ts\": 0.000478,\n \"samples_ns\": [ 23911117477, 23908381380, 23906909483 ],\n \"samples_ts\": [ 5.35316, 5.35377, 5.3541 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T19:55:29Z\",\n \"avg_ns\": 179099443408,\n \"stddev_ns\": 4277748,\n \"avg_ts\": 2.858747,\n \"stddev_ts\": 0.000068,\n \"samples_ns\": [ 179104357756, 179097069436, 179096903033 ],\n \"samples_ts\": [ 2.85867, 2.85878, 2.85879 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T19:53:54Z", + "avg_ns": 23908802780, + "stddev_ns": 2135412, + "avg_ts": 5.353677, + "stddev_ts": 0.000478, + "samples_ns": [ + 23911117477, + 23908381380, + 23906909483 + ], + "samples_ts": [ + 5.35316, + 5.35377, + 5.3541 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T19:55:29Z", + "avg_ns": 179099443408, + "stddev_ns": 4277748, + "avg_ts": 2.858747, + "stddev_ts": 6.8e-05, + "samples_ns": [ + 179104357756, + 179097069436, + 179096903033 + ], + "samples_ts": [ + 2.85867, + 2.85878, + 2.85879 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 889 + }, + { + "timestamp_utc": "2025-12-09T20:13:08.270670+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:04:29Z\",\n \"avg_ns\": 96289744323,\n \"stddev_ns\": 2827684,\n \"avg_ts\": 5.317285,\n \"stddev_ts\": 0.000156,\n \"samples_ns\": [ 96292672109, 96289532170, 96287028690 ],\n \"samples_ts\": [ 5.31712, 5.3173, 5.31743 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:10:54Z\",\n \"avg_ns\": 44325419391,\n \"stddev_ns\": 6799612,\n \"avg_ts\": 2.887734,\n \"stddev_ts\": 0.000443,\n \"samples_ns\": [ 44333208532, 44322353898, 44320695744 ],\n \"samples_ts\": [ 2.88723, 2.88793, 2.88804 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T20:04:29Z", + "avg_ns": 96289744323, + "stddev_ns": 2827684, + "avg_ts": 5.317285, + "stddev_ts": 0.000156, + "samples_ns": [ + 96292672109, + 96289532170, + 96287028690 + ], + "samples_ts": [ + 5.31712, + 5.3173, + 5.31743 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T20:10:54Z", + "avg_ns": 44325419391, + "stddev_ns": 6799612, + "avg_ts": 2.887734, + "stddev_ts": 0.000443, + "samples_ns": [ + 44333208532, + 44322353898, + 44320695744 + ], + "samples_ts": [ + 2.88723, + 2.88793, + 2.88804 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 890 + }, + { + "timestamp_utc": "2025-12-09T20:28:32.932699+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:13:10Z\",\n \"avg_ns\": 96254868033,\n \"stddev_ns\": 2519135,\n \"avg_ts\": 5.319211,\n \"stddev_ts\": 0.000138,\n \"samples_ns\": [ 96256370081, 96256251882, 96251982137 ],\n \"samples_ts\": [ 5.31913, 5.31914, 5.31937 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:19:35Z\",\n \"avg_ns\": 179006076437,\n \"stddev_ns\": 12168347,\n \"avg_ts\": 2.860238,\n \"stddev_ts\": 0.000194,\n \"samples_ns\": [ 179018909386, 179004615288, 178994704637 ],\n \"samples_ts\": [ 2.86003, 2.86026, 2.86042 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T20:13:10Z", + "avg_ns": 96254868033, + "stddev_ns": 2519135, + "avg_ts": 5.319211, + "stddev_ts": 0.000138, + "samples_ns": [ + 96256370081, + 96256251882, + 96251982137 + ], + "samples_ts": [ + 5.31913, + 5.31914, + 5.31937 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T20:19:35Z", + "avg_ns": 179006076437, + "stddev_ns": 12168347, + "avg_ts": 2.860238, + "stddev_ts": 0.000194, + "samples_ns": [ + 179018909386, + 179004615288, + 178994704637 + ], + "samples_ts": [ + 2.86003, + 2.86026, + 2.86042 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 891 + }, + { + "timestamp_utc": "2025-12-09T20:32:24.114942+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:28:35Z\",\n \"avg_ns\": 23923617491,\n \"stddev_ns\": 1839369,\n \"avg_ts\": 5.350361,\n \"stddev_ts\": 0.000410,\n \"samples_ns\": [ 23925648343, 23922086227, 23923117904 ],\n \"samples_ts\": [ 5.34991, 5.3507, 5.35047 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:30:10Z\",\n \"avg_ns\": 44268575326,\n \"stddev_ns\": 7524805,\n \"avg_ts\": 2.891442,\n \"stddev_ts\": 0.000491,\n \"samples_ns\": [ 44277247437, 44264599872, 44263878671 ],\n \"samples_ts\": [ 2.89088, 2.8917, 2.89175 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T20:28:35Z", + "avg_ns": 23923617491, + "stddev_ns": 1839369, + "avg_ts": 5.350361, + "stddev_ts": 0.00041, + "samples_ns": [ + 23925648343, + 23922086227, + 23923117904 + ], + "samples_ts": [ + 5.34991, + 5.3507, + 5.35047 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T20:30:10Z", + "avg_ns": 44268575326, + "stddev_ns": 7524805, + "avg_ts": 2.891442, + "stddev_ts": 0.000491, + "samples_ns": [ + 44277247437, + 44264599872, + 44263878671 + ], + "samples_ts": [ + 2.89088, + 2.8917, + 2.89175 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 892 + }, + { + "timestamp_utc": "2025-12-09T20:43:01.491121+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:32:26Z\",\n \"avg_ns\": 23904101576,\n \"stddev_ns\": 1212607,\n \"avg_ts\": 5.354730,\n \"stddev_ts\": 0.000272,\n \"samples_ns\": [ 23903591246, 23905485940, 23903227542 ],\n \"samples_ts\": [ 5.35484, 5.35442, 5.35493 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:34:01Z\",\n \"avg_ns\": 179712890662,\n \"stddev_ns\": 5151225,\n \"avg_ts\": 2.848989,\n \"stddev_ts\": 0.000081,\n \"samples_ns\": [ 179718796259, 179710074424, 179709801305 ],\n \"samples_ts\": [ 2.8489, 2.84903, 2.84904 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T20:32:26Z", + "avg_ns": 23904101576, + "stddev_ns": 1212607, + "avg_ts": 5.35473, + "stddev_ts": 0.000272, + "samples_ns": [ + 23903591246, + 23905485940, + 23903227542 + ], + "samples_ts": [ + 5.35484, + 5.35442, + 5.35493 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T20:34:01Z", + "avg_ns": 179712890662, + "stddev_ns": 5151225, + "avg_ts": 2.848989, + "stddev_ts": 8.1e-05, + "samples_ns": [ + 179718796259, + 179710074424, + 179709801305 + ], + "samples_ts": [ + 2.8489, + 2.84903, + 2.84904 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 893 + }, + { + "timestamp_utc": "2025-12-09T20:51:43.124267+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:43:03Z\",\n \"avg_ns\": 96422990822,\n \"stddev_ns\": 4075574,\n \"avg_ts\": 5.309937,\n \"stddev_ts\": 0.000224,\n \"samples_ns\": [ 96427353749, 96422337089, 96419281628 ],\n \"samples_ts\": [ 5.3097, 5.30997, 5.31014 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:49:29Z\",\n \"avg_ns\": 44427115407,\n \"stddev_ns\": 7583801,\n \"avg_ts\": 2.881123,\n \"stddev_ts\": 0.000492,\n \"samples_ns\": [ 44435853167, 44423249222, 44422243832 ],\n \"samples_ts\": [ 2.88056, 2.88137, 2.88144 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T20:43:03Z", + "avg_ns": 96422990822, + "stddev_ns": 4075574, + "avg_ts": 5.309937, + "stddev_ts": 0.000224, + "samples_ns": [ + 96427353749, + 96422337089, + 96419281628 + ], + "samples_ts": [ + 5.3097, + 5.30997, + 5.31014 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T20:49:29Z", + "avg_ns": 44427115407, + "stddev_ns": 7583801, + "avg_ts": 2.881123, + "stddev_ts": 0.000492, + "samples_ns": [ + 44435853167, + 44423249222, + 44422243832 + ], + "samples_ts": [ + 2.88056, + 2.88137, + 2.88144 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 894 + }, + { + "timestamp_utc": "2025-12-09T21:07:08.564231+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:51:45Z\",\n \"avg_ns\": 96356818023,\n \"stddev_ns\": 607956,\n \"avg_ts\": 5.313584,\n \"stddev_ts\": 0.000029,\n \"samples_ns\": [ 96357299439, 96356892646, 96356261985 ],\n \"samples_ts\": [ 5.31356, 5.31358, 5.31361 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T20:58:10Z\",\n \"avg_ns\": 179127190269,\n \"stddev_ns\": 12429645,\n \"avg_ts\": 2.858304,\n \"stddev_ts\": 0.000198,\n \"samples_ns\": [ 179141430783, 179121560916, 179118579109 ],\n \"samples_ts\": [ 2.85808, 2.85839, 2.85844 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T20:51:45Z", + "avg_ns": 96356818023, + "stddev_ns": 607956, + "avg_ts": 5.313584, + "stddev_ts": 2.9e-05, + "samples_ns": [ + 96357299439, + 96356892646, + 96356261985 + ], + "samples_ts": [ + 5.31356, + 5.31358, + 5.31361 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T20:58:10Z", + "avg_ns": 179127190269, + "stddev_ns": 12429645, + "avg_ts": 2.858304, + "stddev_ts": 0.000198, + "samples_ns": [ + 179141430783, + 179121560916, + 179118579109 + ], + "samples_ts": [ + 2.85808, + 2.85839, + 2.85844 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 895 + }, + { + "timestamp_utc": "2025-12-09T21:11:00.216323+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:07:10Z\",\n \"avg_ns\": 23966646809,\n \"stddev_ns\": 3269832,\n \"avg_ts\": 5.340756,\n \"stddev_ts\": 0.000728,\n \"samples_ns\": [ 23970418072, 23964793152, 23964729204 ],\n \"samples_ts\": [ 5.33992, 5.34117, 5.34118 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:08:46Z\",\n \"avg_ns\": 44386621865,\n \"stddev_ns\": 2605196,\n \"avg_ts\": 2.883752,\n \"stddev_ts\": 0.000168,\n \"samples_ns\": [ 44388648347, 44387510809, 44383706441 ],\n \"samples_ts\": [ 2.88362, 2.88369, 2.88394 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T21:07:10Z", + "avg_ns": 23966646809, + "stddev_ns": 3269832, + "avg_ts": 5.340756, + "stddev_ts": 0.000728, + "samples_ns": [ + 23970418072, + 23964793152, + 23964729204 + ], + "samples_ts": [ + 5.33992, + 5.34117, + 5.34118 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T21:08:46Z", + "avg_ns": 44386621865, + "stddev_ns": 2605196, + "avg_ts": 2.883752, + "stddev_ts": 0.000168, + "samples_ns": [ + 44388648347, + 44387510809, + 44383706441 + ], + "samples_ts": [ + 2.88362, + 2.88369, + 2.88394 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 896 + }, + { + "timestamp_utc": "2025-12-09T21:21:35.972928+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:11:02Z\",\n \"avg_ns\": 23940373764,\n \"stddev_ns\": 5355864,\n \"avg_ts\": 5.346617,\n \"stddev_ts\": 0.001196,\n \"samples_ns\": [ 23946524904, 23937853079, 23936743309 ],\n \"samples_ts\": [ 5.34524, 5.34718, 5.34743 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:12:38Z\",\n \"avg_ns\": 179121414265,\n \"stddev_ns\": 3347368,\n \"avg_ts\": 2.858396,\n \"stddev_ts\": 0.000053,\n \"samples_ns\": [ 179125171782, 179120196229, 179118874785 ],\n \"samples_ts\": [ 2.85834, 2.85842, 2.85844 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T21:11:02Z", + "avg_ns": 23940373764, + "stddev_ns": 5355864, + "avg_ts": 5.346617, + "stddev_ts": 0.001196, + "samples_ns": [ + 23946524904, + 23937853079, + 23936743309 + ], + "samples_ts": [ + 5.34524, + 5.34718, + 5.34743 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T21:12:38Z", + "avg_ns": 179121414265, + "stddev_ns": 3347368, + "avg_ts": 2.858396, + "stddev_ts": 5.3e-05, + "samples_ns": [ + 179125171782, + 179120196229, + 179118874785 + ], + "samples_ts": [ + 2.85834, + 2.85842, + 2.85844 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 897 + }, + { + "timestamp_utc": "2025-12-09T21:30:20.835528+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:21:38Z\",\n \"avg_ns\": 97328095467,\n \"stddev_ns\": 2902088,\n \"avg_ts\": 5.260557,\n \"stddev_ts\": 0.000156,\n \"samples_ns\": [ 97331259187, 97327418013, 97325609202 ],\n \"samples_ts\": [ 5.26039, 5.26059, 5.26069 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:28:07Z\",\n \"avg_ns\": 44293454488,\n \"stddev_ns\": 14109627,\n \"avg_ts\": 2.889818,\n \"stddev_ts\": 0.000920,\n \"samples_ns\": [ 44309729914, 44285960559, 44284672991 ],\n \"samples_ts\": [ 2.88876, 2.89031, 2.89039 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T21:21:38Z", + "avg_ns": 97328095467, + "stddev_ns": 2902088, + "avg_ts": 5.260557, + "stddev_ts": 0.000156, + "samples_ns": [ + 97331259187, + 97327418013, + 97325609202 + ], + "samples_ts": [ + 5.26039, + 5.26059, + 5.26069 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T21:28:07Z", + "avg_ns": 44293454488, + "stddev_ns": 14109627, + "avg_ts": 2.889818, + "stddev_ts": 0.00092, + "samples_ns": [ + 44309729914, + 44285960559, + 44284672991 + ], + "samples_ts": [ + 2.88876, + 2.89031, + 2.89039 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 898 + }, + { + "timestamp_utc": "2025-12-09T21:45:52.823155+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:30:22Z\",\n \"avg_ns\": 97372233858,\n \"stddev_ns\": 1589473,\n \"avg_ts\": 5.258172,\n \"stddev_ts\": 0.000086,\n \"samples_ns\": [ 97371223255, 97374065974, 97371412345 ],\n \"samples_ts\": [ 5.25823, 5.25807, 5.25822 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:36:52Z\",\n \"avg_ns\": 179950405021,\n \"stddev_ns\": 15422490,\n \"avg_ts\": 2.845228,\n \"stddev_ts\": 0.000244,\n \"samples_ns\": [ 179968205347, 179941690743, 179941318974 ],\n \"samples_ts\": [ 2.84495, 2.84537, 2.84537 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T21:30:22Z", + "avg_ns": 97372233858, + "stddev_ns": 1589473, + "avg_ts": 5.258172, + "stddev_ts": 8.6e-05, + "samples_ns": [ + 97371223255, + 97374065974, + 97371412345 + ], + "samples_ts": [ + 5.25823, + 5.25807, + 5.25822 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T21:36:52Z", + "avg_ns": 179950405021, + "stddev_ns": 15422490, + "avg_ts": 2.845228, + "stddev_ts": 0.000244, + "samples_ns": [ + 179968205347, + 179941690743, + 179941318974 + ], + "samples_ts": [ + 2.84495, + 2.84537, + 2.84537 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 899 + }, + { + "timestamp_utc": "2025-12-09T21:48:08.409320+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:45:54Z\",\n \"avg_ns\": 12005013346,\n \"stddev_ns\": 14572170,\n \"avg_ts\": 10.662223,\n \"stddev_ts\": 0.012933,\n \"samples_ns\": [ 12021772343, 11997937756, 11995329939 ],\n \"samples_ts\": [ 10.6473, 10.6685, 10.6708 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:46:43Z\",\n \"avg_ns\": 28343237016,\n \"stddev_ns\": 9296189,\n \"avg_ts\": 4.516069,\n \"stddev_ts\": 0.001481,\n \"samples_ns\": [ 28353345487, 28341306067, 28335059495 ],\n \"samples_ts\": [ 4.51446, 4.51638, 4.51737 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T21:45:54Z", + "avg_ns": 12005013346, + "stddev_ns": 14572170, + "avg_ts": 10.662223, + "stddev_ts": 0.012933, + "samples_ns": [ + 12021772343, + 11997937756, + 11995329939 + ], + "samples_ts": [ + 10.6473, + 10.6685, + 10.6708 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T21:46:43Z", + "avg_ns": 28343237016, + "stddev_ns": 9296189, + "avg_ts": 4.516069, + "stddev_ts": 0.001481, + "samples_ns": [ + 28353345487, + 28341306067, + 28335059495 + ], + "samples_ts": [ + 4.51446, + 4.51638, + 4.51737 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 900 + }, + { + "timestamp_utc": "2025-12-09T21:54:43.331773+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:48:10Z\",\n \"avg_ns\": 12011192132,\n \"stddev_ns\": 18332375,\n \"avg_ts\": 10.656744,\n \"stddev_ts\": 0.016251,\n \"samples_ns\": [ 12032236435, 12002646079, 11998693884 ],\n \"samples_ts\": [ 10.6381, 10.6643, 10.6678 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:48:58Z\",\n \"avg_ns\": 114780582160,\n \"stddev_ns\": 2751269,\n \"avg_ts\": 4.460685,\n \"stddev_ts\": 0.000107,\n \"samples_ns\": [ 114779413877, 114778607821, 114783724782 ],\n \"samples_ts\": [ 4.46073, 4.46076, 4.46056 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T21:48:10Z", + "avg_ns": 12011192132, + "stddev_ns": 18332375, + "avg_ts": 10.656744, + "stddev_ts": 0.016251, + "samples_ns": [ + 12032236435, + 12002646079, + 11998693884 + ], + "samples_ts": [ + 10.6381, + 10.6643, + 10.6678 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T21:48:58Z", + "avg_ns": 114780582160, + "stddev_ns": 2751269, + "avg_ts": 4.460685, + "stddev_ts": 0.000107, + "samples_ns": [ + 114779413877, + 114778607821, + 114783724782 + ], + "samples_ts": [ + 4.46073, + 4.46076, + 4.46056 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 901 + }, + { + "timestamp_utc": "2025-12-09T21:59:28.607858+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:54:45Z\",\n \"avg_ns\": 49122107064,\n \"stddev_ns\": 2092172,\n \"avg_ts\": 10.423006,\n \"stddev_ts\": 0.000441,\n \"samples_ns\": [ 49120025757, 49122108881, 49124186555 ],\n \"samples_ts\": [ 10.4234, 10.423, 10.4226 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:58:01Z\",\n \"avg_ns\": 28762703254,\n \"stddev_ns\": 9047316,\n \"avg_ts\": 4.450208,\n \"stddev_ts\": 0.001400,\n \"samples_ns\": [ 28767182484, 28768633633, 28752293647 ],\n \"samples_ts\": [ 4.44951, 4.44929, 4.45182 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T21:54:45Z", + "avg_ns": 49122107064, + "stddev_ns": 2092172, + "avg_ts": 10.423006, + "stddev_ts": 0.000441, + "samples_ns": [ + 49120025757, + 49122108881, + 49124186555 + ], + "samples_ts": [ + 10.4234, + 10.423, + 10.4226 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T21:58:01Z", + "avg_ns": 28762703254, + "stddev_ns": 9047316, + "avg_ts": 4.450208, + "stddev_ts": 0.0014, + "samples_ns": [ + 28767182484, + 28768633633, + 28752293647 + ], + "samples_ts": [ + 4.44951, + 4.44929, + 4.45182 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 902 + }, + { + "timestamp_utc": "2025-12-09T22:08:30.360020+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T21:59:30Z\",\n \"avg_ns\": 48407617935,\n \"stddev_ns\": 265583626,\n \"avg_ts\": 10.577059,\n \"stddev_ts\": 0.057847,\n \"samples_ns\": [ 48714287486, 48254233171, 48254333148 ],\n \"samples_ts\": [ 10.5103, 10.6105, 10.6104 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:02:44Z\",\n \"avg_ns\": 115095824750,\n \"stddev_ns\": 19145119,\n \"avg_ts\": 4.448467,\n \"stddev_ts\": 0.000740,\n \"samples_ns\": [ 115093445197, 115116042368, 115077986687 ],\n \"samples_ts\": [ 4.44856, 4.44769, 4.44916 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T21:59:30Z", + "avg_ns": 48407617935, + "stddev_ns": 265583626, + "avg_ts": 10.577059, + "stddev_ts": 0.057847, + "samples_ns": [ + 48714287486, + 48254233171, + 48254333148 + ], + "samples_ts": [ + 10.5103, + 10.6105, + 10.6104 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T22:02:44Z", + "avg_ns": 115095824750, + "stddev_ns": 19145119, + "avg_ts": 4.448467, + "stddev_ts": 0.00074, + "samples_ns": [ + 115093445197, + 115116042368, + 115077986687 + ], + "samples_ts": [ + 4.44856, + 4.44769, + 4.44916 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 903 + }, + { + "timestamp_utc": "2025-12-09T22:10:45.851648+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:08:32Z\",\n \"avg_ns\": 11995775124,\n \"stddev_ns\": 1581563,\n \"avg_ts\": 10.670424,\n \"stddev_ts\": 0.001404,\n \"samples_ns\": [ 11996383813, 11993983678, 11996957882 ],\n \"samples_ts\": [ 10.6699, 10.672, 10.6694 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:09:20Z\",\n \"avg_ns\": 28339610839,\n \"stddev_ns\": 17953258,\n \"avg_ts\": 4.516648,\n \"stddev_ts\": 0.002860,\n \"samples_ns\": [ 28360333336, 28328746526, 28329752655 ],\n \"samples_ts\": [ 4.51335, 4.51838, 4.51822 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T22:08:32Z", + "avg_ns": 11995775124, + "stddev_ns": 1581563, + "avg_ts": 10.670424, + "stddev_ts": 0.001404, + "samples_ns": [ + 11996383813, + 11993983678, + 11996957882 + ], + "samples_ts": [ + 10.6699, + 10.672, + 10.6694 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T22:09:20Z", + "avg_ns": 28339610839, + "stddev_ns": 17953258, + "avg_ts": 4.516648, + "stddev_ts": 0.00286, + "samples_ns": [ + 28360333336, + 28328746526, + 28329752655 + ], + "samples_ts": [ + 4.51335, + 4.51838, + 4.51822 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 904 + }, + { + "timestamp_utc": "2025-12-09T22:17:21.299714+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:10:47Z\",\n \"avg_ns\": 11998754162,\n \"stddev_ns\": 5103748,\n \"avg_ts\": 10.667775,\n \"stddev_ts\": 0.004537,\n \"samples_ns\": [ 11998318019, 12004061986, 11993882481 ],\n \"samples_ts\": [ 10.6682, 10.6631, 10.6721 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:11:36Z\",\n \"avg_ns\": 114980399388,\n \"stddev_ns\": 8182692,\n \"avg_ts\": 4.452933,\n \"stddev_ts\": 0.000317,\n \"samples_ns\": [ 114978132505, 114989469291, 114973596369 ],\n \"samples_ts\": [ 4.45302, 4.45258, 4.4532 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T22:10:47Z", + "avg_ns": 11998754162, + "stddev_ns": 5103748, + "avg_ts": 10.667775, + "stddev_ts": 0.004537, + "samples_ns": [ + 11998318019, + 12004061986, + 11993882481 + ], + "samples_ts": [ + 10.6682, + 10.6631, + 10.6721 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T22:11:36Z", + "avg_ns": 114980399388, + "stddev_ns": 8182692, + "avg_ts": 4.452933, + "stddev_ts": 0.000317, + "samples_ns": [ + 114978132505, + 114989469291, + 114973596369 + ], + "samples_ts": [ + 4.45302, + 4.45258, + 4.4532 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 905 + }, + { + "timestamp_utc": "2025-12-09T22:22:02.513619+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:17:23Z\",\n \"avg_ns\": 48358580854,\n \"stddev_ns\": 1099101,\n \"avg_ts\": 10.587573,\n \"stddev_ts\": 0.000241,\n \"samples_ns\": [ 48357395725, 48359566643, 48358780194 ],\n \"samples_ts\": [ 10.5878, 10.5874, 10.5875 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:20:36Z\",\n \"avg_ns\": 28415863293,\n \"stddev_ns\": 15495783,\n \"avg_ts\": 4.504527,\n \"stddev_ts\": 0.002456,\n \"samples_ns\": [ 28433574147, 28404808970, 28409206763 ],\n \"samples_ts\": [ 4.50172, 4.50628, 4.50558 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T22:17:23Z", + "avg_ns": 48358580854, + "stddev_ns": 1099101, + "avg_ts": 10.587573, + "stddev_ts": 0.000241, + "samples_ns": [ + 48357395725, + 48359566643, + 48358780194 + ], + "samples_ts": [ + 10.5878, + 10.5874, + 10.5875 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T22:20:36Z", + "avg_ns": 28415863293, + "stddev_ns": 15495783, + "avg_ts": 4.504527, + "stddev_ts": 0.002456, + "samples_ns": [ + 28433574147, + 28404808970, + 28409206763 + ], + "samples_ts": [ + 4.50172, + 4.50628, + 4.50558 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 906 + }, + { + "timestamp_utc": "2025-12-09T22:31:03.116622+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:22:04Z\",\n \"avg_ns\": 48372975254,\n \"stddev_ns\": 1489007,\n \"avg_ts\": 10.584422,\n \"stddev_ts\": 0.000326,\n \"samples_ns\": [ 48373165284, 48371400354, 48374360124 ],\n \"samples_ts\": [ 10.5844, 10.5848, 10.5841 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:25:18Z\",\n \"avg_ns\": 114854959082,\n \"stddev_ns\": 77351896,\n \"avg_ts\": 4.457798,\n \"stddev_ts\": 0.003001,\n \"samples_ns\": [ 114944049381, 114815926656, 114804901210 ],\n \"samples_ts\": [ 4.45434, 4.45931, 4.45974 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T22:22:04Z", + "avg_ns": 48372975254, + "stddev_ns": 1489007, + "avg_ts": 10.584422, + "stddev_ts": 0.000326, + "samples_ns": [ + 48373165284, + 48371400354, + 48374360124 + ], + "samples_ts": [ + 10.5844, + 10.5848, + 10.5841 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T22:25:18Z", + "avg_ns": 114854959082, + "stddev_ns": 77351896, + "avg_ts": 4.457798, + "stddev_ts": 0.003001, + "samples_ns": [ + 114944049381, + 114815926656, + 114804901210 + ], + "samples_ts": [ + 4.45434, + 4.45931, + 4.45974 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 907 + }, + { + "timestamp_utc": "2025-12-09T22:33:18.648481+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:31:05Z\",\n \"avg_ns\": 12005019570,\n \"stddev_ns\": 11376126,\n \"avg_ts\": 10.662213,\n \"stddev_ts\": 0.010097,\n \"samples_ns\": [ 11997748221, 12018128252, 11999182239 ],\n \"samples_ts\": [ 10.6687, 10.6506, 10.6674 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:31:53Z\",\n \"avg_ns\": 28349310167,\n \"stddev_ns\": 7904312,\n \"avg_ts\": 4.515101,\n \"stddev_ts\": 0.001258,\n \"samples_ns\": [ 28358212997, 28343125397, 28346592108 ],\n \"samples_ts\": [ 4.51368, 4.51609, 4.51553 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T22:31:05Z", + "avg_ns": 12005019570, + "stddev_ns": 11376126, + "avg_ts": 10.662213, + "stddev_ts": 0.010097, + "samples_ns": [ + 11997748221, + 12018128252, + 11999182239 + ], + "samples_ts": [ + 10.6687, + 10.6506, + 10.6674 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T22:31:53Z", + "avg_ns": 28349310167, + "stddev_ns": 7904312, + "avg_ts": 4.515101, + "stddev_ts": 0.001258, + "samples_ns": [ + 28358212997, + 28343125397, + 28346592108 + ], + "samples_ts": [ + 4.51368, + 4.51609, + 4.51553 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 908 + }, + { + "timestamp_utc": "2025-12-09T22:39:54.859264+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:33:20Z\",\n \"avg_ns\": 12005322055,\n \"stddev_ns\": 10335070,\n \"avg_ts\": 10.661943,\n \"stddev_ts\": 0.009181,\n \"samples_ns\": [ 12008261292, 12013867942, 11993836933 ],\n \"samples_ts\": [ 10.6593, 10.6544, 10.6721 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:34:08Z\",\n \"avg_ns\": 115229283101,\n \"stddev_ns\": 33030550,\n \"avg_ts\": 4.443315,\n \"stddev_ts\": 0.001273,\n \"samples_ns\": [ 115263683040, 115197825853, 115226340412 ],\n \"samples_ts\": [ 4.44199, 4.44453, 4.44343 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T22:33:20Z", + "avg_ns": 12005322055, + "stddev_ns": 10335070, + "avg_ts": 10.661943, + "stddev_ts": 0.009181, + "samples_ns": [ + 12008261292, + 12013867942, + 11993836933 + ], + "samples_ts": [ + 10.6593, + 10.6544, + 10.6721 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T22:34:08Z", + "avg_ns": 115229283101, + "stddev_ns": 33030550, + "avg_ts": 4.443315, + "stddev_ts": 0.001273, + "samples_ns": [ + 115263683040, + 115197825853, + 115226340412 + ], + "samples_ts": [ + 4.44199, + 4.44453, + 4.44343 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 909 + }, + { + "timestamp_utc": "2025-12-09T22:44:38.550568+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:39:57Z\",\n \"avg_ns\": 49014850707,\n \"stddev_ns\": 4496270,\n \"avg_ts\": 10.445814,\n \"stddev_ts\": 0.000956,\n \"samples_ns\": [ 49009718356, 49016814681, 49018019086 ],\n \"samples_ts\": [ 10.4469, 10.4454, 10.4451 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:43:13Z\",\n \"avg_ns\": 28356443617,\n \"stddev_ns\": 16794906,\n \"avg_ts\": 4.513966,\n \"stddev_ts\": 0.002672,\n \"samples_ns\": [ 28375755600, 28348303435, 28345271818 ],\n \"samples_ts\": [ 4.51089, 4.51526, 4.51574 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T22:39:57Z", + "avg_ns": 49014850707, + "stddev_ns": 4496270, + "avg_ts": 10.445814, + "stddev_ts": 0.000956, + "samples_ns": [ + 49009718356, + 49016814681, + 49018019086 + ], + "samples_ts": [ + 10.4469, + 10.4454, + 10.4451 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T22:43:13Z", + "avg_ns": 28356443617, + "stddev_ns": 16794906, + "avg_ts": 4.513966, + "stddev_ts": 0.002672, + "samples_ns": [ + 28375755600, + 28348303435, + 28345271818 + ], + "samples_ts": [ + 4.51089, + 4.51526, + 4.51574 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 910 + }, + { + "timestamp_utc": "2025-12-09T22:53:41.945364+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:44:40Z\",\n \"avg_ns\": 49069651736,\n \"stddev_ns\": 935562,\n \"avg_ts\": 10.434148,\n \"stddev_ts\": 0.000199,\n \"samples_ns\": [ 49070618211, 49068750501, 49069586496 ],\n \"samples_ts\": [ 10.4339, 10.4343, 10.4342 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:47:57Z\",\n \"avg_ns\": 114844474614,\n \"stddev_ns\": 66355497,\n \"avg_ts\": 4.458204,\n \"stddev_ts\": 0.002575,\n \"samples_ns\": [ 114920956775, 114802273514, 114810193555 ],\n \"samples_ts\": [ 4.45524, 4.45984, 4.45953 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T22:44:40Z", + "avg_ns": 49069651736, + "stddev_ns": 935562, + "avg_ts": 10.434148, + "stddev_ts": 0.000199, + "samples_ns": [ + 49070618211, + 49068750501, + 49069586496 + ], + "samples_ts": [ + 10.4339, + 10.4343, + 10.4342 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T22:47:57Z", + "avg_ns": 114844474614, + "stddev_ns": 66355497, + "avg_ts": 4.458204, + "stddev_ts": 0.002575, + "samples_ns": [ + 114920956775, + 114802273514, + 114810193555 + ], + "samples_ts": [ + 4.45524, + 4.45984, + 4.45953 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 911 + }, + { + "timestamp_utc": "2025-12-09T22:55:58.404827+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:53:44Z\",\n \"avg_ns\": 12022089183,\n \"stddev_ns\": 22006635,\n \"avg_ts\": 10.647092,\n \"stddev_ts\": 0.019470,\n \"samples_ns\": [ 12047377794, 12011600018, 12007289738 ],\n \"samples_ts\": [ 10.6247, 10.6564, 10.6602 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:54:32Z\",\n \"avg_ns\": 28570351584,\n \"stddev_ns\": 5668389,\n \"avg_ts\": 4.480169,\n \"stddev_ts\": 0.000889,\n \"samples_ns\": [ 28569839725, 28576258543, 28564956484 ],\n \"samples_ts\": [ 4.48025, 4.47924, 4.48102 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T22:53:44Z", + "avg_ns": 12022089183, + "stddev_ns": 22006635, + "avg_ts": 10.647092, + "stddev_ts": 0.01947, + "samples_ns": [ + 12047377794, + 12011600018, + 12007289738 + ], + "samples_ts": [ + 10.6247, + 10.6564, + 10.6602 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T22:54:32Z", + "avg_ns": 28570351584, + "stddev_ns": 5668389, + "avg_ts": 4.480169, + "stddev_ts": 0.000889, + "samples_ns": [ + 28569839725, + 28576258543, + 28564956484 + ], + "samples_ts": [ + 4.48025, + 4.47924, + 4.48102 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 912 + }, + { + "timestamp_utc": "2025-12-09T23:02:33.031083+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:56:00Z\",\n \"avg_ns\": 11995983621,\n \"stddev_ns\": 3160088,\n \"avg_ts\": 10.670238,\n \"stddev_ts\": 0.002807,\n \"samples_ns\": [ 11999284854, 11992995633, 11995670378 ],\n \"samples_ts\": [ 10.6673, 10.6729, 10.6705 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T22:56:48Z\",\n \"avg_ns\": 114653884513,\n \"stddev_ns\": 9630220,\n \"avg_ts\": 4.465614,\n \"stddev_ts\": 0.000375,\n \"samples_ns\": [ 114664903643, 114647125716, 114649624181 ],\n \"samples_ts\": [ 4.46518, 4.46588, 4.46578 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T22:56:00Z", + "avg_ns": 11995983621, + "stddev_ns": 3160088, + "avg_ts": 10.670238, + "stddev_ts": 0.002807, + "samples_ns": [ + 11999284854, + 11992995633, + 11995670378 + ], + "samples_ts": [ + 10.6673, + 10.6729, + 10.6705 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T22:56:48Z", + "avg_ns": 114653884513, + "stddev_ns": 9630220, + "avg_ts": 4.465614, + "stddev_ts": 0.000375, + "samples_ns": [ + 114664903643, + 114647125716, + 114649624181 + ], + "samples_ts": [ + 4.46518, + 4.46588, + 4.46578 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 913 + }, + { + "timestamp_utc": "2025-12-09T23:07:13.488679+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:02:35Z\",\n \"avg_ns\": 48231199345,\n \"stddev_ns\": 3058216,\n \"avg_ts\": 10.615535,\n \"stddev_ts\": 0.000670,\n \"samples_ns\": [ 48234489874, 48228488462, 48230619701 ],\n \"samples_ts\": [ 10.6148, 10.6161, 10.6157 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:05:48Z\",\n \"avg_ns\": 28302473770,\n \"stddev_ns\": 11589990,\n \"avg_ts\": 4.522573,\n \"stddev_ts\": 0.001852,\n \"samples_ns\": [ 28314473257, 28301600481, 28291347574 ],\n \"samples_ts\": [ 4.52066, 4.52271, 4.52435 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T23:02:35Z", + "avg_ns": 48231199345, + "stddev_ns": 3058216, + "avg_ts": 10.615535, + "stddev_ts": 0.00067, + "samples_ns": [ + 48234489874, + 48228488462, + 48230619701 + ], + "samples_ts": [ + 10.6148, + 10.6161, + 10.6157 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T23:05:48Z", + "avg_ns": 28302473770, + "stddev_ns": 11589990, + "avg_ts": 4.522573, + "stddev_ts": 0.001852, + "samples_ns": [ + 28314473257, + 28301600481, + 28291347574 + ], + "samples_ts": [ + 4.52066, + 4.52271, + 4.52435 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 914 + }, + { + "timestamp_utc": "2025-12-09T23:16:13.276899+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:07:15Z\",\n \"avg_ns\": 48258999868,\n \"stddev_ns\": 8898116,\n \"avg_ts\": 10.609420,\n \"stddev_ts\": 0.001956,\n \"samples_ns\": [ 48269227880, 48254732801, 48253038923 ],\n \"samples_ts\": [ 10.6072, 10.6104, 10.6107 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:10:28Z\",\n \"avg_ns\": 114729516204,\n \"stddev_ns\": 17541889,\n \"avg_ts\": 4.462670,\n \"stddev_ts\": 0.000682,\n \"samples_ns\": [ 114748136990, 114727109985, 114713301637 ],\n \"samples_ts\": [ 4.46195, 4.46276, 4.4633 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T23:07:15Z", + "avg_ns": 48258999868, + "stddev_ns": 8898116, + "avg_ts": 10.60942, + "stddev_ts": 0.001956, + "samples_ns": [ + 48269227880, + 48254732801, + 48253038923 + ], + "samples_ts": [ + 10.6072, + 10.6104, + 10.6107 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T23:10:28Z", + "avg_ns": 114729516204, + "stddev_ns": 17541889, + "avg_ts": 4.46267, + "stddev_ts": 0.000682, + "samples_ns": [ + 114748136990, + 114727109985, + 114713301637 + ], + "samples_ts": [ + 4.46195, + 4.46276, + 4.4633 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 915 + }, + { + "timestamp_utc": "2025-12-09T23:18:28.765837+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:16:15Z\",\n \"avg_ns\": 11997851062,\n \"stddev_ns\": 6288462,\n \"avg_ts\": 10.668579,\n \"stddev_ts\": 0.005590,\n \"samples_ns\": [ 11997075070, 11991987567, 12004490550 ],\n \"samples_ts\": [ 10.6693, 10.6738, 10.6627 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:17:03Z\",\n \"avg_ns\": 28334418958,\n \"stddev_ns\": 4007620,\n \"avg_ts\": 4.517474,\n \"stddev_ts\": 0.000638,\n \"samples_ns\": [ 28334968572, 28330168459, 28338119844 ],\n \"samples_ts\": [ 4.51739, 4.51815, 4.51688 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T23:16:15Z", + "avg_ns": 11997851062, + "stddev_ns": 6288462, + "avg_ts": 10.668579, + "stddev_ts": 0.00559, + "samples_ns": [ + 11997075070, + 11991987567, + 12004490550 + ], + "samples_ts": [ + 10.6693, + 10.6738, + 10.6627 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T23:17:03Z", + "avg_ns": 28334418958, + "stddev_ns": 4007620, + "avg_ts": 4.517474, + "stddev_ts": 0.000638, + "samples_ns": [ + 28334968572, + 28330168459, + 28338119844 + ], + "samples_ts": [ + 4.51739, + 4.51815, + 4.51688 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 916 + }, + { + "timestamp_utc": "2025-12-09T23:25:03.358982+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:18:30Z\",\n \"avg_ns\": 12005656915,\n \"stddev_ns\": 4955498,\n \"avg_ts\": 10.661642,\n \"stddev_ts\": 0.004400,\n \"samples_ns\": [ 12003214342, 12011359534, 12002396869 ],\n \"samples_ts\": [ 10.6638, 10.6566, 10.6645 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:19:18Z\",\n \"avg_ns\": 114673407976,\n \"stddev_ns\": 85611917,\n \"avg_ts\": 4.464855,\n \"stddev_ts\": 0.003332,\n \"samples_ns\": [ 114631903421, 114616459357, 114771861150 ],\n \"samples_ts\": [ 4.46647, 4.46707, 4.46102 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T23:18:30Z", + "avg_ns": 12005656915, + "stddev_ns": 4955498, + "avg_ts": 10.661642, + "stddev_ts": 0.0044, + "samples_ns": [ + 12003214342, + 12011359534, + 12002396869 + ], + "samples_ts": [ + 10.6638, + 10.6566, + 10.6645 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T23:19:18Z", + "avg_ns": 114673407976, + "stddev_ns": 85611917, + "avg_ts": 4.464855, + "stddev_ts": 0.003332, + "samples_ns": [ + 114631903421, + 114616459357, + 114771861150 + ], + "samples_ts": [ + 4.46647, + 4.46707, + 4.46102 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 917 + }, + { + "timestamp_utc": "2025-12-09T23:29:44.448054+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:25:05Z\",\n \"avg_ns\": 48376295380,\n \"stddev_ns\": 1580716,\n \"avg_ts\": 10.583696,\n \"stddev_ts\": 0.000346,\n \"samples_ns\": [ 48378064422, 48375800132, 48375021586 ],\n \"samples_ts\": [ 10.5833, 10.5838, 10.584 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:28:19Z\",\n \"avg_ns\": 28342997982,\n \"stddev_ns\": 19049752,\n \"avg_ts\": 4.516108,\n \"stddev_ts\": 0.003034,\n \"samples_ns\": [ 28364809212, 28334549822, 28329634914 ],\n \"samples_ts\": [ 4.51263, 4.51745, 4.51824 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T23:25:05Z", + "avg_ns": 48376295380, + "stddev_ns": 1580716, + "avg_ts": 10.583696, + "stddev_ts": 0.000346, + "samples_ns": [ + 48378064422, + 48375800132, + 48375021586 + ], + "samples_ts": [ + 10.5833, + 10.5838, + 10.584 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T23:28:19Z", + "avg_ns": 28342997982, + "stddev_ns": 19049752, + "avg_ts": 4.516108, + "stddev_ts": 0.003034, + "samples_ns": [ + 28364809212, + 28334549822, + 28329634914 + ], + "samples_ts": [ + 4.51263, + 4.51745, + 4.51824 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 918 + }, + { + "timestamp_utc": "2025-12-09T23:38:52.295726+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:29:46Z\",\n \"avg_ns\": 49173204615,\n \"stddev_ns\": 4867625,\n \"avg_ts\": 10.412175,\n \"stddev_ts\": 0.001031,\n \"samples_ns\": [ 49178557154, 49172013783, 49169042908 ],\n \"samples_ts\": [ 10.411, 10.4124, 10.4131 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:33:03Z\",\n \"avg_ns\": 116296343628,\n \"stddev_ns\": 738068183,\n \"avg_ts\": 4.402664,\n \"stddev_ts\": 0.027935,\n \"samples_ns\": [ 117043297271, 116278239455, 115567494159 ],\n \"samples_ts\": [ 4.37445, 4.40323, 4.43031 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T23:29:46Z", + "avg_ns": 49173204615, + "stddev_ns": 4867625, + "avg_ts": 10.412175, + "stddev_ts": 0.001031, + "samples_ns": [ + 49178557154, + 49172013783, + 49169042908 + ], + "samples_ts": [ + 10.411, + 10.4124, + 10.4131 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T23:33:03Z", + "avg_ns": 116296343628, + "stddev_ns": 738068183, + "avg_ts": 4.402664, + "stddev_ts": 0.027935, + "samples_ns": [ + 117043297271, + 116278239455, + 115567494159 + ], + "samples_ts": [ + 4.37445, + 4.40323, + 4.43031 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 919 + }, + { + "timestamp_utc": "2025-12-09T23:41:07.873639+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:38:54Z\",\n \"avg_ns\": 12001729814,\n \"stddev_ns\": 12186076,\n \"avg_ts\": 10.665137,\n \"stddev_ts\": 0.010822,\n \"samples_ns\": [ 12015762997, 11995595722, 11993830725 ],\n \"samples_ts\": [ 10.6527, 10.6706, 10.6722 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:39:42Z\",\n \"avg_ns\": 28364675275,\n \"stddev_ns\": 3071581,\n \"avg_ts\": 4.512655,\n \"stddev_ts\": 0.000487,\n \"samples_ns\": [ 28361162214, 28366082975, 28366780638 ],\n \"samples_ts\": [ 4.51321, 4.51243, 4.51232 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T23:38:54Z", + "avg_ns": 12001729814, + "stddev_ns": 12186076, + "avg_ts": 10.665137, + "stddev_ts": 0.010822, + "samples_ns": [ + 12015762997, + 11995595722, + 11993830725 + ], + "samples_ts": [ + 10.6527, + 10.6706, + 10.6722 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T23:39:42Z", + "avg_ns": 28364675275, + "stddev_ns": 3071581, + "avg_ts": 4.512655, + "stddev_ts": 0.000487, + "samples_ns": [ + 28361162214, + 28366082975, + 28366780638 + ], + "samples_ts": [ + 4.51321, + 4.51243, + 4.51232 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 920 + }, + { + "timestamp_utc": "2025-12-09T23:47:43.142066+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:41:10Z\",\n \"avg_ns\": 12050426461,\n \"stddev_ns\": 77259518,\n \"avg_ts\": 10.622321,\n \"stddev_ts\": 0.067876,\n \"samples_ns\": [ 12138711111, 12017392089, 11995176184 ],\n \"samples_ts\": [ 10.5448, 10.6512, 10.671 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:41:58Z\",\n \"avg_ns\": 114799241407,\n \"stddev_ns\": 18541510,\n \"avg_ts\": 4.459960,\n \"stddev_ts\": 0.000720,\n \"samples_ns\": [ 114818940631, 114782137262, 114796646329 ],\n \"samples_ts\": [ 4.45919, 4.46062, 4.46006 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T23:41:10Z", + "avg_ns": 12050426461, + "stddev_ns": 77259518, + "avg_ts": 10.622321, + "stddev_ts": 0.067876, + "samples_ns": [ + 12138711111, + 12017392089, + 11995176184 + ], + "samples_ts": [ + 10.5448, + 10.6512, + 10.671 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T23:41:58Z", + "avg_ns": 114799241407, + "stddev_ns": 18541510, + "avg_ts": 4.45996, + "stddev_ts": 0.00072, + "samples_ns": [ + 114818940631, + 114782137262, + 114796646329 + ], + "samples_ts": [ + 4.45919, + 4.46062, + 4.46006 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 921 + }, + { + "timestamp_utc": "2025-12-09T23:52:26.790004+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:47:45Z\",\n \"avg_ns\": 49033924976,\n \"stddev_ns\": 1394967,\n \"avg_ts\": 10.441750,\n \"stddev_ts\": 0.000289,\n \"samples_ns\": [ 49035476085, 49033357785, 49032941060 ],\n \"samples_ts\": [ 10.4414, 10.4419, 10.442 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:51:01Z\",\n \"avg_ns\": 28320462605,\n \"stddev_ns\": 10911613,\n \"avg_ts\": 4.519701,\n \"stddev_ts\": 0.001741,\n \"samples_ns\": [ 28329616565, 28323381542, 28308389709 ],\n \"samples_ts\": [ 4.51824, 4.51923, 4.52163 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T23:47:45Z", + "avg_ns": 49033924976, + "stddev_ns": 1394967, + "avg_ts": 10.44175, + "stddev_ts": 0.000289, + "samples_ns": [ + 49035476085, + 49033357785, + 49032941060 + ], + "samples_ts": [ + 10.4414, + 10.4419, + 10.442 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-09T23:51:01Z", + "avg_ns": 28320462605, + "stddev_ns": 10911613, + "avg_ts": 4.519701, + "stddev_ts": 0.001741, + "samples_ns": [ + 28329616565, + 28323381542, + 28308389709 + ], + "samples_ts": [ + 4.51824, + 4.51923, + 4.52163 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 922 + }, + { + "timestamp_utc": "2025-12-10T00:01:30.607021+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:52:28Z\",\n \"avg_ns\": 49045981019,\n \"stddev_ns\": 4093360,\n \"avg_ts\": 10.439184,\n \"stddev_ts\": 0.000871,\n \"samples_ns\": [ 49050698662, 49043370243, 49043874152 ],\n \"samples_ts\": [ 10.4382, 10.4397, 10.4396 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-09T23:55:45Z\",\n \"avg_ns\": 115036487136,\n \"stddev_ns\": 41351525,\n \"avg_ts\": 4.450762,\n \"stddev_ts\": 0.001600,\n \"samples_ns\": [ 115025145504, 115001991310, 115082324595 ],\n \"samples_ts\": [ 4.4512, 4.4521, 4.44899 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-09T23:52:28Z", + "avg_ns": 49045981019, + "stddev_ns": 4093360, + "avg_ts": 10.439184, + "stddev_ts": 0.000871, + "samples_ns": [ + 49050698662, + 49043370243, + 49043874152 + ], + "samples_ts": [ + 10.4382, + 10.4397, + 10.4396 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-09T23:55:45Z", + "avg_ns": 115036487136, + "stddev_ns": 41351525, + "avg_ts": 4.450762, + "stddev_ts": 0.0016, + "samples_ns": [ + 115025145504, + 115001991310, + 115082324595 + ], + "samples_ts": [ + 4.4512, + 4.4521, + 4.44899 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 923 + }, + { + "timestamp_utc": "2025-12-10T00:03:46.428713+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:01:32Z\",\n \"avg_ns\": 11999442018,\n \"stddev_ns\": 6111238,\n \"avg_ts\": 10.667165,\n \"stddev_ts\": 0.005430,\n \"samples_ns\": [ 12006482345, 11996322586, 11995521124 ],\n \"samples_ts\": [ 10.6609, 10.6699, 10.6706 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:02:20Z\",\n \"avg_ns\": 28435486723,\n \"stddev_ns\": 4054607,\n \"avg_ts\": 4.501418,\n \"stddev_ts\": 0.000641,\n \"samples_ns\": [ 28439367805, 28431290994, 28435801372 ],\n \"samples_ts\": [ 4.5008, 4.50208, 4.50137 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T00:01:32Z", + "avg_ns": 11999442018, + "stddev_ns": 6111238, + "avg_ts": 10.667165, + "stddev_ts": 0.00543, + "samples_ns": [ + 12006482345, + 11996322586, + 11995521124 + ], + "samples_ts": [ + 10.6609, + 10.6699, + 10.6706 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T00:02:20Z", + "avg_ns": 28435486723, + "stddev_ns": 4054607, + "avg_ts": 4.501418, + "stddev_ts": 0.000641, + "samples_ns": [ + 28439367805, + 28431290994, + 28435801372 + ], + "samples_ts": [ + 4.5008, + 4.50208, + 4.50137 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 924 + }, + { + "timestamp_utc": "2025-12-10T00:10:21.260831+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:03:48Z\",\n \"avg_ns\": 12001118051,\n \"stddev_ns\": 2232229,\n \"avg_ts\": 10.665673,\n \"stddev_ts\": 0.001979,\n \"samples_ns\": [ 12003328108, 11998874787, 12001151260 ],\n \"samples_ts\": [ 10.6637, 10.6677, 10.6656 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:04:36Z\",\n \"avg_ns\": 114770574511,\n \"stddev_ns\": 2100250,\n \"avg_ts\": 4.461074,\n \"stddev_ts\": 0.000081,\n \"samples_ns\": [ 114769015159, 114772926637, 114769781738 ],\n \"samples_ts\": [ 4.46113, 4.46098, 4.4611 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T00:03:48Z", + "avg_ns": 12001118051, + "stddev_ns": 2232229, + "avg_ts": 10.665673, + "stddev_ts": 0.001979, + "samples_ns": [ + 12003328108, + 11998874787, + 12001151260 + ], + "samples_ts": [ + 10.6637, + 10.6677, + 10.6656 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T00:04:36Z", + "avg_ns": 114770574511, + "stddev_ns": 2100250, + "avg_ts": 4.461074, + "stddev_ts": 8.1e-05, + "samples_ns": [ + 114769015159, + 114772926637, + 114769781738 + ], + "samples_ts": [ + 4.46113, + 4.46098, + 4.4611 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 925 + }, + { + "timestamp_utc": "2025-12-10T00:15:01.976568+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:10:23Z\",\n \"avg_ns\": 48262491230,\n \"stddev_ns\": 1001117,\n \"avg_ts\": 10.608653,\n \"stddev_ts\": 0.000215,\n \"samples_ns\": [ 48263064342, 48263045883, 48261363466 ],\n \"samples_ts\": [ 10.6085, 10.6085, 10.6089 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:13:36Z\",\n \"avg_ns\": 28365800742,\n \"stddev_ns\": 15110078,\n \"avg_ts\": 4.512477,\n \"stddev_ts\": 0.002403,\n \"samples_ns\": [ 28382906864, 28354277569, 28360217794 ],\n \"samples_ts\": [ 4.50976, 4.51431, 4.51336 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T00:10:23Z", + "avg_ns": 48262491230, + "stddev_ns": 1001117, + "avg_ts": 10.608653, + "stddev_ts": 0.000215, + "samples_ns": [ + 48263064342, + 48263045883, + 48261363466 + ], + "samples_ts": [ + 10.6085, + 10.6085, + 10.6089 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T00:13:36Z", + "avg_ns": 28365800742, + "stddev_ns": 15110078, + "avg_ts": 4.512477, + "stddev_ts": 0.002403, + "samples_ns": [ + 28382906864, + 28354277569, + 28360217794 + ], + "samples_ts": [ + 4.50976, + 4.51431, + 4.51336 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 926 + }, + { + "timestamp_utc": "2025-12-10T00:24:03.540000+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:15:04Z\",\n \"avg_ns\": 48260162037,\n \"stddev_ns\": 757413,\n \"avg_ts\": 10.609165,\n \"stddev_ts\": 0.000167,\n \"samples_ns\": [ 48261036472, 48259710741, 48259738898 ],\n \"samples_ts\": [ 10.609, 10.6093, 10.6093 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:18:17Z\",\n \"avg_ns\": 115327379158,\n \"stddev_ns\": 17204323,\n \"avg_ts\": 4.439536,\n \"stddev_ts\": 0.000662,\n \"samples_ns\": [ 115346655490, 115321901041, 115313580943 ],\n \"samples_ts\": [ 4.43879, 4.43975, 4.44007 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T00:15:04Z", + "avg_ns": 48260162037, + "stddev_ns": 757413, + "avg_ts": 10.609165, + "stddev_ts": 0.000167, + "samples_ns": [ + 48261036472, + 48259710741, + 48259738898 + ], + "samples_ts": [ + 10.609, + 10.6093, + 10.6093 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T00:18:17Z", + "avg_ns": 115327379158, + "stddev_ns": 17204323, + "avg_ts": 4.439536, + "stddev_ts": 0.000662, + "samples_ns": [ + 115346655490, + 115321901041, + 115313580943 + ], + "samples_ts": [ + 4.43879, + 4.43975, + 4.44007 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 927 + }, + { + "timestamp_utc": "2025-12-10T00:26:20.131796+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:24:05Z\",\n \"avg_ns\": 12218117511,\n \"stddev_ns\": 18932860,\n \"avg_ts\": 10.476262,\n \"stddev_ts\": 0.016220,\n \"samples_ns\": [ 12205281756, 12239861371, 12209209406 ],\n \"samples_ts\": [ 10.4873, 10.4576, 10.4839 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:24:54Z\",\n \"avg_ns\": 28410610216,\n \"stddev_ns\": 106673635,\n \"avg_ts\": 4.505401,\n \"stddev_ts\": 0.016881,\n \"samples_ns\": [ 28533209503, 28359620635, 28339000511 ],\n \"samples_ts\": [ 4.486, 4.51346, 4.51674 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T00:24:05Z", + "avg_ns": 12218117511, + "stddev_ns": 18932860, + "avg_ts": 10.476262, + "stddev_ts": 0.01622, + "samples_ns": [ + 12205281756, + 12239861371, + 12209209406 + ], + "samples_ts": [ + 10.4873, + 10.4576, + 10.4839 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T00:24:54Z", + "avg_ns": 28410610216, + "stddev_ns": 106673635, + "avg_ts": 4.505401, + "stddev_ts": 0.016881, + "samples_ns": [ + 28533209503, + 28359620635, + 28339000511 + ], + "samples_ts": [ + 4.486, + 4.51346, + 4.51674 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 928 + }, + { + "timestamp_utc": "2025-12-10T00:32:54.154388+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:26:22Z\",\n \"avg_ns\": 11998031483,\n \"stddev_ns\": 14812309,\n \"avg_ts\": 10.668428,\n \"stddev_ts\": 0.013161,\n \"samples_ns\": [ 11989682402, 12015133210, 11989278838 ],\n \"samples_ts\": [ 10.6758, 10.6532, 10.6762 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:27:10Z\",\n \"avg_ns\": 114527603968,\n \"stddev_ns\": 23808188,\n \"avg_ts\": 4.470538,\n \"stddev_ts\": 0.000929,\n \"samples_ns\": [ 114554471208, 114509126203, 114519214493 ],\n \"samples_ts\": [ 4.46949, 4.47126, 4.47087 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T00:26:22Z", + "avg_ns": 11998031483, + "stddev_ns": 14812309, + "avg_ts": 10.668428, + "stddev_ts": 0.013161, + "samples_ns": [ + 11989682402, + 12015133210, + 11989278838 + ], + "samples_ts": [ + 10.6758, + 10.6532, + 10.6762 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T00:27:10Z", + "avg_ns": 114527603968, + "stddev_ns": 23808188, + "avg_ts": 4.470538, + "stddev_ts": 0.000929, + "samples_ns": [ + 114554471208, + 114509126203, + 114519214493 + ], + "samples_ts": [ + 4.46949, + 4.47126, + 4.47087 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 929 + }, + { + "timestamp_utc": "2025-12-10T00:37:35.711816+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:32:56Z\",\n \"avg_ns\": 48352131372,\n \"stddev_ns\": 3980170,\n \"avg_ts\": 10.588985,\n \"stddev_ts\": 0.000870,\n \"samples_ns\": [ 48356360793, 48351558545, 48348474779 ],\n \"samples_ts\": [ 10.5881, 10.5891, 10.5898 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:36:10Z\",\n \"avg_ns\": 28385723440,\n \"stddev_ns\": 18363851,\n \"avg_ts\": 4.509310,\n \"stddev_ts\": 0.002916,\n \"samples_ns\": [ 28406896526, 28376111384, 28374162412 ],\n \"samples_ts\": [ 4.50595, 4.51084, 4.51115 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T00:32:56Z", + "avg_ns": 48352131372, + "stddev_ns": 3980170, + "avg_ts": 10.588985, + "stddev_ts": 0.00087, + "samples_ns": [ + 48356360793, + 48351558545, + 48348474779 + ], + "samples_ts": [ + 10.5881, + 10.5891, + 10.5898 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T00:36:10Z", + "avg_ns": 28385723440, + "stddev_ns": 18363851, + "avg_ts": 4.50931, + "stddev_ts": 0.002916, + "samples_ns": [ + 28406896526, + 28376111384, + 28374162412 + ], + "samples_ts": [ + 4.50595, + 4.51084, + 4.51115 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 930 + }, + { + "timestamp_utc": "2025-12-10T00:46:40.310540+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:37:37Z\",\n \"avg_ns\": 49274862630,\n \"stddev_ns\": 3176562,\n \"avg_ts\": 10.390694,\n \"stddev_ts\": 0.000668,\n \"samples_ns\": [ 49272482733, 49273645643, 49278459515 ],\n \"samples_ts\": [ 10.3912, 10.391, 10.3899 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:40:54Z\",\n \"avg_ns\": 115007628479,\n \"stddev_ns\": 694067644,\n \"avg_ts\": 4.451986,\n \"stddev_ts\": 0.026774,\n \"samples_ns\": [ 115809067874, 114607884930, 114605932634 ],\n \"samples_ts\": [ 4.42107, 4.46741, 4.46748 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T00:37:37Z", + "avg_ns": 49274862630, + "stddev_ns": 3176562, + "avg_ts": 10.390694, + "stddev_ts": 0.000668, + "samples_ns": [ + 49272482733, + 49273645643, + 49278459515 + ], + "samples_ts": [ + 10.3912, + 10.391, + 10.3899 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T00:40:54Z", + "avg_ns": 115007628479, + "stddev_ns": 694067644, + "avg_ts": 4.451986, + "stddev_ts": 0.026774, + "samples_ns": [ + 115809067874, + 114607884930, + 114605932634 + ], + "samples_ts": [ + 4.42107, + 4.46741, + 4.46748 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 931 + }, + { + "timestamp_utc": "2025-12-10T00:48:56.130583+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:46:42Z\",\n \"avg_ns\": 12011532806,\n \"stddev_ns\": 15182408,\n \"avg_ts\": 10.656436,\n \"stddev_ts\": 0.013469,\n \"samples_ns\": [ 12026828921, 12011302779, 11996466718 ],\n \"samples_ts\": [ 10.6429, 10.6566, 10.6698 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:47:30Z\",\n \"avg_ns\": 28447613201,\n \"stddev_ns\": 4522437,\n \"avg_ts\": 4.499499,\n \"stddev_ts\": 0.000715,\n \"samples_ns\": [ 28450633917, 28449791867, 28442413819 ],\n \"samples_ts\": [ 4.49902, 4.49915, 4.50032 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T00:46:42Z", + "avg_ns": 12011532806, + "stddev_ns": 15182408, + "avg_ts": 10.656436, + "stddev_ts": 0.013469, + "samples_ns": [ + 12026828921, + 12011302779, + 11996466718 + ], + "samples_ts": [ + 10.6429, + 10.6566, + 10.6698 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T00:47:30Z", + "avg_ns": 28447613201, + "stddev_ns": 4522437, + "avg_ts": 4.499499, + "stddev_ts": 0.000715, + "samples_ns": [ + 28450633917, + 28449791867, + 28442413819 + ], + "samples_ts": [ + 4.49902, + 4.49915, + 4.50032 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 932 + }, + { + "timestamp_utc": "2025-12-10T00:55:30.780974+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:48:58Z\",\n \"avg_ns\": 12010105292,\n \"stddev_ns\": 14984988,\n \"avg_ts\": 10.657703,\n \"stddev_ts\": 0.013289,\n \"samples_ns\": [ 12027157284, 12004118598, 11999039996 ],\n \"samples_ts\": [ 10.6426, 10.663, 10.6675 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:49:46Z\",\n \"avg_ns\": 114721011851,\n \"stddev_ns\": 12936237,\n \"avg_ts\": 4.463001,\n \"stddev_ts\": 0.000503,\n \"samples_ns\": [ 114735487211, 114710599352, 114716948991 ],\n \"samples_ts\": [ 4.46244, 4.46341, 4.46316 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T00:48:58Z", + "avg_ns": 12010105292, + "stddev_ns": 14984988, + "avg_ts": 10.657703, + "stddev_ts": 0.013289, + "samples_ns": [ + 12027157284, + 12004118598, + 11999039996 + ], + "samples_ts": [ + 10.6426, + 10.663, + 10.6675 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T00:49:46Z", + "avg_ns": 114721011851, + "stddev_ns": 12936237, + "avg_ts": 4.463001, + "stddev_ts": 0.000503, + "samples_ns": [ + 114735487211, + 114710599352, + 114716948991 + ], + "samples_ts": [ + 4.46244, + 4.46341, + 4.46316 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 933 + }, + { + "timestamp_utc": "2025-12-10T01:00:14.647885+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:55:32Z\",\n \"avg_ns\": 49039966200,\n \"stddev_ns\": 5258131,\n \"avg_ts\": 10.440464,\n \"stddev_ts\": 0.001118,\n \"samples_ns\": [ 49034339185, 49044742215, 49040817201 ],\n \"samples_ts\": [ 10.4417, 10.4394, 10.4403 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T00:58:49Z\",\n \"avg_ns\": 28406838404,\n \"stddev_ns\": 14357919,\n \"avg_ts\": 4.505958,\n \"stddev_ts\": 0.002277,\n \"samples_ns\": [ 28423315658, 28397019735, 28400179820 ],\n \"samples_ts\": [ 4.50335, 4.50752, 4.50701 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T00:55:32Z", + "avg_ns": 49039966200, + "stddev_ns": 5258131, + "avg_ts": 10.440464, + "stddev_ts": 0.001118, + "samples_ns": [ + 49034339185, + 49044742215, + 49040817201 + ], + "samples_ts": [ + 10.4417, + 10.4394, + 10.4403 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T00:58:49Z", + "avg_ns": 28406838404, + "stddev_ns": 14357919, + "avg_ts": 4.505958, + "stddev_ts": 0.002277, + "samples_ns": [ + 28423315658, + 28397019735, + 28400179820 + ], + "samples_ts": [ + 4.50335, + 4.50752, + 4.50701 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 934 + }, + { + "timestamp_utc": "2025-12-10T01:09:19.231688+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:00:16Z\",\n \"avg_ns\": 49050561476,\n \"stddev_ns\": 5438776,\n \"avg_ts\": 10.438209,\n \"stddev_ts\": 0.001156,\n \"samples_ns\": [ 49056741340, 49048413992, 49046529097 ],\n \"samples_ts\": [ 10.4369, 10.4387, 10.4391 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:03:32Z\",\n \"avg_ns\": 115296800862,\n \"stddev_ns\": 33551977,\n \"avg_ts\": 4.440713,\n \"stddev_ts\": 0.001292,\n \"samples_ns\": [ 115335152980, 115282351425, 115272898183 ],\n \"samples_ts\": [ 4.43924, 4.44127, 4.44163 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T01:00:16Z", + "avg_ns": 49050561476, + "stddev_ns": 5438776, + "avg_ts": 10.438209, + "stddev_ts": 0.001156, + "samples_ns": [ + 49056741340, + 49048413992, + 49046529097 + ], + "samples_ts": [ + 10.4369, + 10.4387, + 10.4391 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T01:03:32Z", + "avg_ns": 115296800862, + "stddev_ns": 33551977, + "avg_ts": 4.440713, + "stddev_ts": 0.001292, + "samples_ns": [ + 115335152980, + 115282351425, + 115272898183 + ], + "samples_ts": [ + 4.43924, + 4.44127, + 4.44163 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 935 + }, + { + "timestamp_utc": "2025-12-10T01:11:16.209158+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:09:21Z\",\n \"avg_ns\": 8243615334,\n \"stddev_ns\": 7642664,\n \"avg_ts\": 15.527177,\n \"stddev_ts\": 0.014388,\n \"samples_ns\": [ 8252439936, 8239131472, 8239274594 ],\n \"samples_ts\": [ 15.5106, 15.5356, 15.5353 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:09:54Z\",\n \"avg_ns\": 27146194782,\n \"stddev_ns\": 16991014,\n \"avg_ts\": 4.715211,\n \"stddev_ts\": 0.002950,\n \"samples_ns\": [ 27165677290, 27138458141, 27134448915 ],\n \"samples_ts\": [ 4.71183, 4.71655, 4.71725 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T01:09:21Z", + "avg_ns": 8243615334, + "stddev_ns": 7642664, + "avg_ts": 15.527177, + "stddev_ts": 0.014388, + "samples_ns": [ + 8252439936, + 8239131472, + 8239274594 + ], + "samples_ts": [ + 15.5106, + 15.5356, + 15.5353 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T01:09:54Z", + "avg_ns": 27146194782, + "stddev_ns": 16991014, + "avg_ts": 4.715211, + "stddev_ts": 0.00295, + "samples_ns": [ + 27165677290, + 27138458141, + 27134448915 + ], + "samples_ts": [ + 4.71183, + 4.71655, + 4.71725 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 936 + }, + { + "timestamp_utc": "2025-12-10T01:17:20.312341+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:11:18Z\",\n \"avg_ns\": 8255094540,\n \"stddev_ns\": 8929131,\n \"avg_ts\": 15.505589,\n \"stddev_ts\": 0.016772,\n \"samples_ns\": [ 8255539281, 8263792528, 8245951812 ],\n \"samples_ts\": [ 15.5047, 15.4893, 15.5228 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:11:51Z\",\n \"avg_ns\": 109516225646,\n \"stddev_ns\": 52668053,\n \"avg_ts\": 4.675107,\n \"stddev_ts\": 0.002248,\n \"samples_ns\": [ 109575814794, 109475911353, 109496950792 ],\n \"samples_ts\": [ 4.67256, 4.67683, 4.67593 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T01:11:18Z", + "avg_ns": 8255094540, + "stddev_ns": 8929131, + "avg_ts": 15.505589, + "stddev_ts": 0.016772, + "samples_ns": [ + 8255539281, + 8263792528, + 8245951812 + ], + "samples_ts": [ + 15.5047, + 15.4893, + 15.5228 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T01:11:51Z", + "avg_ns": 109516225646, + "stddev_ns": 52668053, + "avg_ts": 4.675107, + "stddev_ts": 0.002248, + "samples_ns": [ + 109575814794, + 109475911353, + 109496950792 + ], + "samples_ts": [ + 4.67256, + 4.67683, + 4.67593 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 937 + }, + { + "timestamp_utc": "2025-12-10T01:20:58.064283+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:17:22Z\",\n \"avg_ns\": 33153884396,\n \"stddev_ns\": 3894125,\n \"avg_ts\": 15.443138,\n \"stddev_ts\": 0.001810,\n \"samples_ns\": [ 33157647942, 33149887276, 33154117972 ],\n \"samples_ts\": [ 15.4414, 15.445, 15.443 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:19:35Z\",\n \"avg_ns\": 27492852205,\n \"stddev_ns\": 387878449,\n \"avg_ts\": 4.656378,\n \"stddev_ts\": 0.066149,\n \"samples_ns\": [ 27053041303, 27639447070, 27786068243 ],\n \"samples_ts\": [ 4.73145, 4.63106, 4.60663 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T01:17:22Z", + "avg_ns": 33153884396, + "stddev_ns": 3894125, + "avg_ts": 15.443138, + "stddev_ts": 0.00181, + "samples_ns": [ + 33157647942, + 33149887276, + 33154117972 + ], + "samples_ts": [ + 15.4414, + 15.445, + 15.443 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T01:19:35Z", + "avg_ns": 27492852205, + "stddev_ns": 387878449, + "avg_ts": 4.656378, + "stddev_ts": 0.066149, + "samples_ns": [ + 27053041303, + 27639447070, + 27786068243 + ], + "samples_ts": [ + 4.73145, + 4.63106, + 4.60663 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 938 + }, + { + "timestamp_utc": "2025-12-10T01:28:49.830655+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:21:00Z\",\n \"avg_ns\": 33163251684,\n \"stddev_ns\": 6146620,\n \"avg_ts\": 15.438776,\n \"stddev_ts\": 0.002859,\n \"samples_ns\": [ 33159474839, 33159942397, 33170337818 ],\n \"samples_ts\": [ 15.4405, 15.4403, 15.4355 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:23:12Z\",\n \"avg_ns\": 112189341870,\n \"stddev_ns\": 721237894,\n \"avg_ts\": 4.563839,\n \"stddev_ts\": 0.029242,\n \"samples_ns\": [ 113012676987, 111669169285, 111886179340 ],\n \"samples_ts\": [ 4.53047, 4.58497, 4.57608 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T01:21:00Z", + "avg_ns": 33163251684, + "stddev_ns": 6146620, + "avg_ts": 15.438776, + "stddev_ts": 0.002859, + "samples_ns": [ + 33159474839, + 33159942397, + 33170337818 + ], + "samples_ts": [ + 15.4405, + 15.4403, + 15.4355 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T01:23:12Z", + "avg_ns": 112189341870, + "stddev_ns": 721237894, + "avg_ts": 4.563839, + "stddev_ts": 0.029242, + "samples_ns": [ + 113012676987, + 111669169285, + 111886179340 + ], + "samples_ts": [ + 4.53047, + 4.58497, + 4.57608 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 939 + }, + { + "timestamp_utc": "2025-12-10T01:30:47.290095+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:28:51Z\",\n \"avg_ns\": 8262937148,\n \"stddev_ns\": 28676176,\n \"avg_ts\": 15.490984,\n \"stddev_ts\": 0.053663,\n \"samples_ns\": [ 8295726203, 8250540310, 8242544931 ],\n \"samples_ts\": [ 15.4296, 15.5141, 15.5292 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:29:24Z\",\n \"avg_ns\": 27323723218,\n \"stddev_ns\": 128571583,\n \"avg_ts\": 4.684643,\n \"stddev_ts\": 0.022011,\n \"samples_ns\": [ 27211859836, 27464186178, 27295123642 ],\n \"samples_ts\": [ 4.70383, 4.66062, 4.68948 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T01:28:51Z", + "avg_ns": 8262937148, + "stddev_ns": 28676176, + "avg_ts": 15.490984, + "stddev_ts": 0.053663, + "samples_ns": [ + 8295726203, + 8250540310, + 8242544931 + ], + "samples_ts": [ + 15.4296, + 15.5141, + 15.5292 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T01:29:24Z", + "avg_ns": 27323723218, + "stddev_ns": 128571583, + "avg_ts": 4.684643, + "stddev_ts": 0.022011, + "samples_ns": [ + 27211859836, + 27464186178, + 27295123642 + ], + "samples_ts": [ + 4.70383, + 4.66062, + 4.68948 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 940 + }, + { + "timestamp_utc": "2025-12-10T01:36:54.051717+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:30:49Z\",\n \"avg_ns\": 8259212200,\n \"stddev_ns\": 19371662,\n \"avg_ts\": 15.497903,\n \"stddev_ts\": 0.036304,\n \"samples_ns\": [ 8245844393, 8250364278, 8281427929 ],\n \"samples_ts\": [ 15.523, 15.5145, 15.4563 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:31:22Z\",\n \"avg_ns\": 110420338630,\n \"stddev_ns\": 745397014,\n \"avg_ts\": 4.636967,\n \"stddev_ts\": 0.031213,\n \"samples_ns\": [ 109819192911, 111254376181, 110187446798 ],\n \"samples_ts\": [ 4.66221, 4.60207, 4.64663 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T01:30:49Z", + "avg_ns": 8259212200, + "stddev_ns": 19371662, + "avg_ts": 15.497903, + "stddev_ts": 0.036304, + "samples_ns": [ + 8245844393, + 8250364278, + 8281427929 + ], + "samples_ts": [ + 15.523, + 15.5145, + 15.4563 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T01:31:22Z", + "avg_ns": 110420338630, + "stddev_ns": 745397014, + "avg_ts": 4.636967, + "stddev_ts": 0.031213, + "samples_ns": [ + 109819192911, + 111254376181, + 110187446798 + ], + "samples_ts": [ + 4.66221, + 4.60207, + 4.64663 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 941 + }, + { + "timestamp_utc": "2025-12-10T01:40:31.036476+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:36:56Z\",\n \"avg_ns\": 33303277796,\n \"stddev_ns\": 3050453,\n \"avg_ts\": 15.373862,\n \"stddev_ts\": 0.001408,\n \"samples_ns\": [ 33306784324, 33301813432, 33301235632 ],\n \"samples_ts\": [ 15.3722, 15.3745, 15.3748 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:39:09Z\",\n \"avg_ns\": 27079333322,\n \"stddev_ns\": 13233351,\n \"avg_ts\": 4.726853,\n \"stddev_ts\": 0.002309,\n \"samples_ns\": [ 27094492166, 27073404521, 27070103281 ],\n \"samples_ts\": [ 4.72421, 4.72789, 4.72846 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T01:36:56Z", + "avg_ns": 33303277796, + "stddev_ns": 3050453, + "avg_ts": 15.373862, + "stddev_ts": 0.001408, + "samples_ns": [ + 33306784324, + 33301813432, + 33301235632 + ], + "samples_ts": [ + 15.3722, + 15.3745, + 15.3748 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T01:39:09Z", + "avg_ns": 27079333322, + "stddev_ns": 13233351, + "avg_ts": 4.726853, + "stddev_ts": 0.002309, + "samples_ns": [ + 27094492166, + 27073404521, + 27070103281 + ], + "samples_ts": [ + 4.72421, + 4.72789, + 4.72846 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 942 + }, + { + "timestamp_utc": "2025-12-10T01:48:14.630922+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:40:33Z\",\n \"avg_ns\": 33298117708,\n \"stddev_ns\": 5715486,\n \"avg_ts\": 15.376245,\n \"stddev_ts\": 0.002638,\n \"samples_ns\": [ 33298486530, 33303636934, 33292229661 ],\n \"samples_ts\": [ 15.3761, 15.3737, 15.379 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:42:46Z\",\n \"avg_ns\": 109294971758,\n \"stddev_ns\": 11497823,\n \"avg_ts\": 4.684571,\n \"stddev_ts\": 0.000493,\n \"samples_ns\": [ 109296438216, 109282815843, 109305661216 ],\n \"samples_ts\": [ 4.68451, 4.68509, 4.68411 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T01:40:33Z", + "avg_ns": 33298117708, + "stddev_ns": 5715486, + "avg_ts": 15.376245, + "stddev_ts": 0.002638, + "samples_ns": [ + 33298486530, + 33303636934, + 33292229661 + ], + "samples_ts": [ + 15.3761, + 15.3737, + 15.379 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T01:42:46Z", + "avg_ns": 109294971758, + "stddev_ns": 11497823, + "avg_ts": 4.684571, + "stddev_ts": 0.000493, + "samples_ns": [ + 109296438216, + 109282815843, + 109305661216 + ], + "samples_ts": [ + 4.68451, + 4.68509, + 4.68411 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 943 + }, + { + "timestamp_utc": "2025-12-10T01:50:11.831524+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:48:16Z\",\n \"avg_ns\": 8252050068,\n \"stddev_ns\": 14033112,\n \"avg_ts\": 15.511327,\n \"stddev_ts\": 0.026352,\n \"samples_ns\": [ 8243540709, 8244362329, 8268247166 ],\n \"samples_ts\": [ 15.5273, 15.5258, 15.4809 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:48:49Z\",\n \"avg_ns\": 27236544576,\n \"stddev_ns\": 353445918,\n \"avg_ts\": 4.700092,\n \"stddev_ts\": 0.060555,\n \"samples_ns\": [ 27002343708, 27064186062, 27643103958 ],\n \"samples_ts\": [ 4.74033, 4.7295, 4.63045 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T01:48:16Z", + "avg_ns": 8252050068, + "stddev_ns": 14033112, + "avg_ts": 15.511327, + "stddev_ts": 0.026352, + "samples_ns": [ + 8243540709, + 8244362329, + 8268247166 + ], + "samples_ts": [ + 15.5273, + 15.5258, + 15.4809 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T01:48:49Z", + "avg_ns": 27236544576, + "stddev_ns": 353445918, + "avg_ts": 4.700092, + "stddev_ts": 0.060555, + "samples_ns": [ + 27002343708, + 27064186062, + 27643103958 + ], + "samples_ts": [ + 4.74033, + 4.7295, + 4.63045 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 944 + }, + { + "timestamp_utc": "2025-12-10T01:56:19.088473+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:50:14Z\",\n \"avg_ns\": 8264145449,\n \"stddev_ns\": 9803850,\n \"avg_ts\": 15.488609,\n \"stddev_ts\": 0.018381,\n \"samples_ns\": [ 8272767628, 8266187234, 8253481485 ],\n \"samples_ts\": [ 15.4725, 15.4848, 15.5086 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:50:47Z\",\n \"avg_ns\": 110545465940,\n \"stddev_ns\": 1035425923,\n \"avg_ts\": 4.631848,\n \"stddev_ts\": 0.043160,\n \"samples_ns\": [ 109852868725, 111735765997, 110047763100 ],\n \"samples_ts\": [ 4.66078, 4.58224, 4.65253 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T01:50:14Z", + "avg_ns": 8264145449, + "stddev_ns": 9803850, + "avg_ts": 15.488609, + "stddev_ts": 0.018381, + "samples_ns": [ + 8272767628, + 8266187234, + 8253481485 + ], + "samples_ts": [ + 15.4725, + 15.4848, + 15.5086 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T01:50:47Z", + "avg_ns": 110545465940, + "stddev_ns": 1035425923, + "avg_ts": 4.631848, + "stddev_ts": 0.04316, + "samples_ns": [ + 109852868725, + 111735765997, + 110047763100 + ], + "samples_ts": [ + 4.66078, + 4.58224, + 4.65253 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 945 + }, + { + "timestamp_utc": "2025-12-10T01:59:58.156835+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:56:21Z\",\n \"avg_ns\": 33814408636,\n \"stddev_ns\": 3203970,\n \"avg_ts\": 15.141474,\n \"stddev_ts\": 0.001430,\n \"samples_ns\": [ 33818067400, 33812181906, 33812976604 ],\n \"samples_ts\": [ 15.1398, 15.1425, 15.1421 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T01:58:36Z\",\n \"avg_ns\": 27118900458,\n \"stddev_ns\": 132756909,\n \"avg_ts\": 4.720031,\n \"stddev_ts\": 0.023052,\n \"samples_ns\": [ 27017819212, 27069634739, 27269247424 ],\n \"samples_ts\": [ 4.73761, 4.72855, 4.69393 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T01:56:21Z", + "avg_ns": 33814408636, + "stddev_ns": 3203970, + "avg_ts": 15.141474, + "stddev_ts": 0.00143, + "samples_ns": [ + 33818067400, + 33812181906, + 33812976604 + ], + "samples_ts": [ + 15.1398, + 15.1425, + 15.1421 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T01:58:36Z", + "avg_ns": 27118900458, + "stddev_ns": 132756909, + "avg_ts": 4.720031, + "stddev_ts": 0.023052, + "samples_ns": [ + 27017819212, + 27069634739, + 27269247424 + ], + "samples_ts": [ + 4.73761, + 4.72855, + 4.69393 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 946 + }, + { + "timestamp_utc": "2025-12-10T02:07:45.510825+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:00:00Z\",\n \"avg_ns\": 33807885633,\n \"stddev_ns\": 5107402,\n \"avg_ts\": 15.144396,\n \"stddev_ts\": 0.002288,\n \"samples_ns\": [ 33811504757, 33802043452, 33810108690 ],\n \"samples_ts\": [ 15.1428, 15.147, 15.1434 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:02:15Z\",\n \"avg_ns\": 109868351442,\n \"stddev_ns\": 278297226,\n \"avg_ts\": 4.660143,\n \"stddev_ts\": 0.011787,\n \"samples_ns\": [ 109688475739, 109727675364, 110188903224 ],\n \"samples_ts\": [ 4.66776, 4.6661, 4.64657 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:00:00Z", + "avg_ns": 33807885633, + "stddev_ns": 5107402, + "avg_ts": 15.144396, + "stddev_ts": 0.002288, + "samples_ns": [ + 33811504757, + 33802043452, + 33810108690 + ], + "samples_ts": [ + 15.1428, + 15.147, + 15.1434 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T02:02:15Z", + "avg_ns": 109868351442, + "stddev_ns": 278297226, + "avg_ts": 4.660143, + "stddev_ts": 0.011787, + "samples_ns": [ + 109688475739, + 109727675364, + 110188903224 + ], + "samples_ts": [ + 4.66776, + 4.6661, + 4.64657 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 947 + }, + { + "timestamp_utc": "2025-12-10T02:09:42.244826+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:07:47Z\",\n \"avg_ns\": 8262944038,\n \"stddev_ns\": 8833927,\n \"avg_ts\": 15.490859,\n \"stddev_ts\": 0.016551,\n \"samples_ns\": [ 8273142966, 8257687488, 8258001660 ],\n \"samples_ts\": [ 15.4718, 15.5007, 15.5001 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:08:20Z\",\n \"avg_ns\": 27066918663,\n \"stddev_ns\": 11105487,\n \"avg_ts\": 4.729021,\n \"stddev_ts\": 0.001939,\n \"samples_ns\": [ 27079604773, 27058971025, 27062180193 ],\n \"samples_ts\": [ 4.7268, 4.73041, 4.72985 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:07:47Z", + "avg_ns": 8262944038, + "stddev_ns": 8833927, + "avg_ts": 15.490859, + "stddev_ts": 0.016551, + "samples_ns": [ + 8273142966, + 8257687488, + 8258001660 + ], + "samples_ts": [ + 15.4718, + 15.5007, + 15.5001 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T02:08:20Z", + "avg_ns": 27066918663, + "stddev_ns": 11105487, + "avg_ts": 4.729021, + "stddev_ts": 0.001939, + "samples_ns": [ + 27079604773, + 27058971025, + 27062180193 + ], + "samples_ts": [ + 4.7268, + 4.73041, + 4.72985 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 948 + }, + { + "timestamp_utc": "2025-12-10T02:15:46.485055+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:09:44Z\",\n \"avg_ns\": 8250914040,\n \"stddev_ns\": 13210300,\n \"avg_ts\": 15.513459,\n \"stddev_ts\": 0.024857,\n \"samples_ns\": [ 8260950799, 8235947814, 8255843507 ],\n \"samples_ts\": [ 15.4946, 15.5416, 15.5042 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:10:17Z\",\n \"avg_ns\": 109577507013,\n \"stddev_ns\": 15258322,\n \"avg_ts\": 4.672492,\n \"stddev_ts\": 0.000650,\n \"samples_ns\": [ 109573053430, 109564978419, 109594489192 ],\n \"samples_ts\": [ 4.67268, 4.67303, 4.67177 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:09:44Z", + "avg_ns": 8250914040, + "stddev_ns": 13210300, + "avg_ts": 15.513459, + "stddev_ts": 0.024857, + "samples_ns": [ + 8260950799, + 8235947814, + 8255843507 + ], + "samples_ts": [ + 15.4946, + 15.5416, + 15.5042 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T02:10:17Z", + "avg_ns": 109577507013, + "stddev_ns": 15258322, + "avg_ts": 4.672492, + "stddev_ts": 0.00065, + "samples_ns": [ + 109573053430, + 109564978419, + 109594489192 + ], + "samples_ts": [ + 4.67268, + 4.67303, + 4.67177 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 949 + }, + { + "timestamp_utc": "2025-12-10T02:19:22.814922+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:15:48Z\",\n \"avg_ns\": 33137665831,\n \"stddev_ns\": 39195241,\n \"avg_ts\": 15.450710,\n \"stddev_ts\": 0.018280,\n \"samples_ns\": [ 33144076073, 33173260820, 33095660600 ],\n \"samples_ts\": [ 15.4477, 15.4341, 15.4703 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:18:01Z\",\n \"avg_ns\": 27067871704,\n \"stddev_ns\": 22790324,\n \"avg_ts\": 4.728856,\n \"stddev_ts\": 0.003981,\n \"samples_ns\": [ 27091398430, 27066316900, 27045899784 ],\n \"samples_ts\": [ 4.72475, 4.72913, 4.7327 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:15:48Z", + "avg_ns": 33137665831, + "stddev_ns": 39195241, + "avg_ts": 15.45071, + "stddev_ts": 0.01828, + "samples_ns": [ + 33144076073, + 33173260820, + 33095660600 + ], + "samples_ts": [ + 15.4477, + 15.4341, + 15.4703 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T02:18:01Z", + "avg_ns": 27067871704, + "stddev_ns": 22790324, + "avg_ts": 4.728856, + "stddev_ts": 0.003981, + "samples_ns": [ + 27091398430, + 27066316900, + 27045899784 + ], + "samples_ts": [ + 4.72475, + 4.72913, + 4.7327 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 950 + }, + { + "timestamp_utc": "2025-12-10T02:27:12.816452+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:19:24Z\",\n \"avg_ns\": 33187540110,\n \"stddev_ns\": 4457185,\n \"avg_ts\": 15.427477,\n \"stddev_ts\": 0.002069,\n \"samples_ns\": [ 33191810993, 33187878493, 33182930846 ],\n \"samples_ts\": [ 15.4255, 15.4273, 15.4296 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:21:37Z\",\n \"avg_ns\": 111561671738,\n \"stddev_ns\": 457813975,\n \"avg_ts\": 4.589441,\n \"stddev_ts\": 0.018868,\n \"samples_ns\": [ 111047395519, 111712821631, 111924798065 ],\n \"samples_ts\": [ 4.61064, 4.58318, 4.5745 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:19:24Z", + "avg_ns": 33187540110, + "stddev_ns": 4457185, + "avg_ts": 15.427477, + "stddev_ts": 0.002069, + "samples_ns": [ + 33191810993, + 33187878493, + 33182930846 + ], + "samples_ts": [ + 15.4255, + 15.4273, + 15.4296 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T02:21:37Z", + "avg_ns": 111561671738, + "stddev_ns": 457813975, + "avg_ts": 4.589441, + "stddev_ts": 0.018868, + "samples_ns": [ + 111047395519, + 111712821631, + 111924798065 + ], + "samples_ts": [ + 4.61064, + 4.58318, + 4.5745 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 951 + }, + { + "timestamp_utc": "2025-12-10T02:29:09.693844+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:27:14Z\",\n \"avg_ns\": 8258568855,\n \"stddev_ns\": 11222604,\n \"avg_ts\": 15.499073,\n \"stddev_ts\": 0.021045,\n \"samples_ns\": [ 8271519251, 8252496493, 8251690821 ],\n \"samples_ts\": [ 15.4748, 15.5105, 15.512 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:27:47Z\",\n \"avg_ns\": 27123223669,\n \"stddev_ns\": 23201743,\n \"avg_ts\": 4.719205,\n \"stddev_ts\": 0.004038,\n \"samples_ns\": [ 27096731952, 27133018684, 27139920373 ],\n \"samples_ts\": [ 4.72382, 4.7175, 4.7163 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:27:14Z", + "avg_ns": 8258568855, + "stddev_ns": 11222604, + "avg_ts": 15.499073, + "stddev_ts": 0.021045, + "samples_ns": [ + 8271519251, + 8252496493, + 8251690821 + ], + "samples_ts": [ + 15.4748, + 15.5105, + 15.512 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T02:27:47Z", + "avg_ns": 27123223669, + "stddev_ns": 23201743, + "avg_ts": 4.719205, + "stddev_ts": 0.004038, + "samples_ns": [ + 27096731952, + 27133018684, + 27139920373 + ], + "samples_ts": [ + 4.72382, + 4.7175, + 4.7163 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 952 + }, + { + "timestamp_utc": "2025-12-10T02:35:18.485155+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:29:11Z\",\n \"avg_ns\": 8250958731,\n \"stddev_ns\": 4077704,\n \"avg_ts\": 15.513351,\n \"stddev_ts\": 0.007663,\n \"samples_ns\": [ 8250972451, 8246876209, 8255027535 ],\n \"samples_ts\": [ 15.5133, 15.521, 15.5057 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:29:44Z\",\n \"avg_ns\": 111088485966,\n \"stddev_ns\": 3124091110,\n \"avg_ts\": 4.609072,\n \"stddev_ts\": 0.030448,\n \"samples_ns\": [ 110295850177, 111740478233, 111229129490 ],\n \"samples_ts\": [ 4.64206, 4.58205, 4.60311 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:29:11Z", + "avg_ns": 8250958731, + "stddev_ns": 4077704, + "avg_ts": 15.513351, + "stddev_ts": 0.007663, + "samples_ns": [ + 8250972451, + 8246876209, + 8255027535 + ], + "samples_ts": [ + 15.5133, + 15.521, + 15.5057 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T02:29:44Z", + "avg_ns": 111088485966, + "stddev_ns": 3124091110, + "avg_ts": 4.609072, + "stddev_ts": 0.030448, + "samples_ns": [ + 110295850177, + 111740478233, + 111229129490 + ], + "samples_ts": [ + 4.64206, + 4.58205, + 4.60311 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 953 + }, + { + "timestamp_utc": "2025-12-10T02:38:56.171377+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:35:20Z\",\n \"avg_ns\": 33325906575,\n \"stddev_ns\": 15747026,\n \"avg_ts\": 15.363425,\n \"stddev_ts\": 0.007261,\n \"samples_ns\": [ 33331505662, 33338088904, 33308125159 ],\n \"samples_ts\": [ 15.3608, 15.3578, 15.3716 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:37:33Z\",\n \"avg_ns\": 27297504580,\n \"stddev_ns\": 264831318,\n \"avg_ts\": 4.689369,\n \"stddev_ts\": 0.045740,\n \"samples_ns\": [ 26993237812, 27476130850, 27423145080 ],\n \"samples_ts\": [ 4.74193, 4.65859, 4.66759 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:35:20Z", + "avg_ns": 33325906575, + "stddev_ns": 15747026, + "avg_ts": 15.363425, + "stddev_ts": 0.007261, + "samples_ns": [ + 33331505662, + 33338088904, + 33308125159 + ], + "samples_ts": [ + 15.3608, + 15.3578, + 15.3716 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T02:37:33Z", + "avg_ns": 27297504580, + "stddev_ns": 264831318, + "avg_ts": 4.689369, + "stddev_ts": 0.04574, + "samples_ns": [ + 26993237812, + 27476130850, + 27423145080 + ], + "samples_ts": [ + 4.74193, + 4.65859, + 4.66759 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 954 + }, + { + "timestamp_utc": "2025-12-10T02:46:40.112479+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:38:58Z\",\n \"avg_ns\": 33324557638,\n \"stddev_ns\": 5697506,\n \"avg_ts\": 15.364045,\n \"stddev_ts\": 0.002627,\n \"samples_ns\": [ 33322122584, 33331068034, 33320482296 ],\n \"samples_ts\": [ 15.3652, 15.361, 15.3659 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:41:11Z\",\n \"avg_ns\": 109366809824,\n \"stddev_ns\": 16139050,\n \"avg_ts\": 4.681493,\n \"stddev_ts\": 0.000690,\n \"samples_ns\": [ 109385437692, 109357449597, 109357542185 ],\n \"samples_ts\": [ 4.6807, 4.68189, 4.68189 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:38:58Z", + "avg_ns": 33324557638, + "stddev_ns": 5697506, + "avg_ts": 15.364045, + "stddev_ts": 0.002627, + "samples_ns": [ + 33322122584, + 33331068034, + 33320482296 + ], + "samples_ts": [ + 15.3652, + 15.361, + 15.3659 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T02:41:11Z", + "avg_ns": 109366809824, + "stddev_ns": 16139050, + "avg_ts": 4.681493, + "stddev_ts": 0.00069, + "samples_ns": [ + 109385437692, + 109357449597, + 109357542185 + ], + "samples_ts": [ + 4.6807, + 4.68189, + 4.68189 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 955 + }, + { + "timestamp_utc": "2025-12-10T02:48:37.040932+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:46:42Z\",\n \"avg_ns\": 8253887452,\n \"stddev_ns\": 15038018,\n \"avg_ts\": 15.507878,\n \"stddev_ts\": 0.028224,\n \"samples_ns\": [ 8245206184, 8245204630, 8271251543 ],\n \"samples_ts\": [ 15.5242, 15.5242, 15.4753 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:47:15Z\",\n \"avg_ns\": 27152971143,\n \"stddev_ns\": 2571690,\n \"avg_ts\": 4.714033,\n \"stddev_ts\": 0.000446,\n \"samples_ns\": [ 27150314698, 27155436869, 27153161863 ],\n \"samples_ts\": [ 4.71449, 4.7136, 4.714 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:46:42Z", + "avg_ns": 8253887452, + "stddev_ns": 15038018, + "avg_ts": 15.507878, + "stddev_ts": 0.028224, + "samples_ns": [ + 8245206184, + 8245204630, + 8271251543 + ], + "samples_ts": [ + 15.5242, + 15.5242, + 15.4753 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T02:47:15Z", + "avg_ns": 27152971143, + "stddev_ns": 2571690, + "avg_ts": 4.714033, + "stddev_ts": 0.000446, + "samples_ns": [ + 27150314698, + 27155436869, + 27153161863 + ], + "samples_ts": [ + 4.71449, + 4.7136, + 4.714 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 956 + }, + { + "timestamp_utc": "2025-12-10T02:54:45.201835+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:48:39Z\",\n \"avg_ns\": 8254288207,\n \"stddev_ns\": 9150639,\n \"avg_ts\": 15.507104,\n \"stddev_ts\": 0.017195,\n \"samples_ns\": [ 8262728202, 8255573577, 8244562842 ],\n \"samples_ts\": [ 15.4913, 15.5047, 15.5254 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:49:12Z\",\n \"avg_ns\": 110885390573,\n \"stddev_ns\": 922714982,\n \"avg_ts\": 4.617594,\n \"stddev_ts\": 0.038494,\n \"samples_ns\": [ 109902815283, 111019870055, 111733486383 ],\n \"samples_ts\": [ 4.65866, 4.61179, 4.58233 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:48:39Z", + "avg_ns": 8254288207, + "stddev_ns": 9150639, + "avg_ts": 15.507104, + "stddev_ts": 0.017195, + "samples_ns": [ + 8262728202, + 8255573577, + 8244562842 + ], + "samples_ts": [ + 15.4913, + 15.5047, + 15.5254 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T02:49:12Z", + "avg_ns": 110885390573, + "stddev_ns": 922714982, + "avg_ts": 4.617594, + "stddev_ts": 0.038494, + "samples_ns": [ + 109902815283, + 111019870055, + 111733486383 + ], + "samples_ts": [ + 4.65866, + 4.61179, + 4.58233 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 957 + }, + { + "timestamp_utc": "2025-12-10T02:58:25.734077+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:54:47Z\",\n \"avg_ns\": 33817783675,\n \"stddev_ns\": 6906955,\n \"avg_ts\": 15.139964,\n \"stddev_ts\": 0.003090,\n \"samples_ns\": [ 33810035858, 33823275266, 33820039903 ],\n \"samples_ts\": [ 15.1434, 15.1375, 15.139 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:57:02Z\",\n \"avg_ns\": 27573648448,\n \"stddev_ns\": 548778644,\n \"avg_ts\": 4.643340,\n \"stddev_ts\": 0.092454,\n \"samples_ns\": [ 27022654975, 27578105356, 28120185015 ],\n \"samples_ts\": [ 4.73677, 4.64136, 4.55189 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:54:47Z", + "avg_ns": 33817783675, + "stddev_ns": 6906955, + "avg_ts": 15.139964, + "stddev_ts": 0.00309, + "samples_ns": [ + 33810035858, + 33823275266, + 33820039903 + ], + "samples_ts": [ + 15.1434, + 15.1375, + 15.139 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T02:57:02Z", + "avg_ns": 27573648448, + "stddev_ns": 548778644, + "avg_ts": 4.64334, + "stddev_ts": 0.092454, + "samples_ns": [ + 27022654975, + 27578105356, + 28120185015 + ], + "samples_ts": [ + 4.73677, + 4.64136, + 4.55189 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 958 + }, + { + "timestamp_utc": "2025-12-10T03:06:14.051802+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T02:58:27Z\",\n \"avg_ns\": 33798805157,\n \"stddev_ns\": 9398271,\n \"avg_ts\": 15.148465,\n \"stddev_ts\": 0.004211,\n \"samples_ns\": [ 33809448695, 33791658551, 33795308226 ],\n \"samples_ts\": [ 15.1437, 15.1517, 15.15 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:00:43Z\",\n \"avg_ns\": 110184761522,\n \"stddev_ns\": 353783885,\n \"avg_ts\": 4.646772,\n \"stddev_ts\": 0.014908,\n \"samples_ns\": [ 110565004699, 110123962717, 109865317150 ],\n \"samples_ts\": [ 4.63076, 4.64931, 4.66025 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T02:58:27Z", + "avg_ns": 33798805157, + "stddev_ns": 9398271, + "avg_ts": 15.148465, + "stddev_ts": 0.004211, + "samples_ns": [ + 33809448695, + 33791658551, + 33795308226 + ], + "samples_ts": [ + 15.1437, + 15.1517, + 15.15 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T03:00:43Z", + "avg_ns": 110184761522, + "stddev_ns": 353783885, + "avg_ts": 4.646772, + "stddev_ts": 0.014908, + "samples_ns": [ + 110565004699, + 110123962717, + 109865317150 + ], + "samples_ts": [ + 4.63076, + 4.64931, + 4.66025 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 959 + }, + { + "timestamp_utc": "2025-12-10T03:08:10.770079+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:06:16Z\",\n \"avg_ns\": 8246904466,\n \"stddev_ns\": 2610606,\n \"avg_ts\": 15.520976,\n \"stddev_ts\": 0.004911,\n \"samples_ns\": [ 8243937647, 8248841204, 8247934548 ],\n \"samples_ts\": [ 15.5266, 15.5173, 15.519 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:06:49Z\",\n \"avg_ns\": 27074202960,\n \"stddev_ns\": 9650175,\n \"avg_ts\": 4.727748,\n \"stddev_ts\": 0.001685,\n \"samples_ns\": [ 27082048244, 27077133385, 27063427251 ],\n \"samples_ts\": [ 4.72638, 4.72724, 4.72963 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T03:06:16Z", + "avg_ns": 8246904466, + "stddev_ns": 2610606, + "avg_ts": 15.520976, + "stddev_ts": 0.004911, + "samples_ns": [ + 8243937647, + 8248841204, + 8247934548 + ], + "samples_ts": [ + 15.5266, + 15.5173, + 15.519 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T03:06:49Z", + "avg_ns": 27074202960, + "stddev_ns": 9650175, + "avg_ts": 4.727748, + "stddev_ts": 0.001685, + "samples_ns": [ + 27082048244, + 27077133385, + 27063427251 + ], + "samples_ts": [ + 4.72638, + 4.72724, + 4.72963 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 960 + }, + { + "timestamp_utc": "2025-12-10T03:14:15.152619+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:08:12Z\",\n \"avg_ns\": 8259733721,\n \"stddev_ns\": 11805253,\n \"avg_ts\": 15.496889,\n \"stddev_ts\": 0.022149,\n \"samples_ns\": [ 8271514958, 8247905292, 8259780914 ],\n \"samples_ts\": [ 15.4748, 15.5191, 15.4968 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:08:45Z\",\n \"avg_ns\": 109623126556,\n \"stddev_ns\": 13864868,\n \"avg_ts\": 4.670547,\n \"stddev_ts\": 0.000590,\n \"samples_ns\": [ 109633115490, 109607307062, 109628957118 ],\n \"samples_ts\": [ 4.67012, 4.67122, 4.6703 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T03:08:12Z", + "avg_ns": 8259733721, + "stddev_ns": 11805253, + "avg_ts": 15.496889, + "stddev_ts": 0.022149, + "samples_ns": [ + 8271514958, + 8247905292, + 8259780914 + ], + "samples_ts": [ + 15.4748, + 15.5191, + 15.4968 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T03:08:45Z", + "avg_ns": 109623126556, + "stddev_ns": 13864868, + "avg_ts": 4.670547, + "stddev_ts": 0.00059, + "samples_ns": [ + 109633115490, + 109607307062, + 109628957118 + ], + "samples_ts": [ + 4.67012, + 4.67122, + 4.6703 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 961 + }, + { + "timestamp_utc": "2025-12-10T03:17:51.692917+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:14:17Z\",\n \"avg_ns\": 33152679145,\n \"stddev_ns\": 1890094,\n \"avg_ts\": 15.443699,\n \"stddev_ts\": 0.000872,\n \"samples_ns\": [ 33152905060, 33150703966, 33154428411 ],\n \"samples_ts\": [ 15.4436, 15.4446, 15.4429 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:16:29Z\",\n \"avg_ns\": 27131039250,\n \"stddev_ns\": 20249406,\n \"avg_ts\": 4.717845,\n \"stddev_ts\": 0.003522,\n \"samples_ns\": [ 27109133662, 27149073525, 27134910563 ],\n \"samples_ts\": [ 4.72166, 4.71471, 4.71717 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T03:14:17Z", + "avg_ns": 33152679145, + "stddev_ns": 1890094, + "avg_ts": 15.443699, + "stddev_ts": 0.000872, + "samples_ns": [ + 33152905060, + 33150703966, + 33154428411 + ], + "samples_ts": [ + 15.4436, + 15.4446, + 15.4429 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T03:16:29Z", + "avg_ns": 27131039250, + "stddev_ns": 20249406, + "avg_ts": 4.717845, + "stddev_ts": 0.003522, + "samples_ns": [ + 27109133662, + 27149073525, + 27134910563 + ], + "samples_ts": [ + 4.72166, + 4.71471, + 4.71717 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 962 + }, + { + "timestamp_utc": "2025-12-10T03:25:36.087557+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:17:53Z\",\n \"avg_ns\": 33165607398,\n \"stddev_ns\": 8075253,\n \"avg_ts\": 15.437680,\n \"stddev_ts\": 0.003759,\n \"samples_ns\": [ 33165897206, 33173533846, 33157391142 ],\n \"samples_ts\": [ 15.4375, 15.434, 15.4415 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:20:06Z\",\n \"avg_ns\": 109732782900,\n \"stddev_ns\": 180384677,\n \"avg_ts\": 4.665888,\n \"stddev_ts\": 0.007663,\n \"samples_ns\": [ 109653747113, 109605407354, 109939194235 ],\n \"samples_ts\": [ 4.66924, 4.6713, 4.65712 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T03:17:53Z", + "avg_ns": 33165607398, + "stddev_ns": 8075253, + "avg_ts": 15.43768, + "stddev_ts": 0.003759, + "samples_ns": [ + 33165897206, + 33173533846, + 33157391142 + ], + "samples_ts": [ + 15.4375, + 15.434, + 15.4415 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T03:20:06Z", + "avg_ns": 109732782900, + "stddev_ns": 180384677, + "avg_ts": 4.665888, + "stddev_ts": 0.007663, + "samples_ns": [ + 109653747113, + 109605407354, + 109939194235 + ], + "samples_ts": [ + 4.66924, + 4.6713, + 4.65712 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 963 + }, + { + "timestamp_utc": "2025-12-10T03:27:32.897844+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:25:38Z\",\n \"avg_ns\": 8246763011,\n \"stddev_ns\": 8663848,\n \"avg_ts\": 15.521253,\n \"stddev_ts\": 0.016304,\n \"samples_ns\": [ 8238432513, 8255725590, 8246130930 ],\n \"samples_ts\": [ 15.5369, 15.5044, 15.5224 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:26:11Z\",\n \"avg_ns\": 27116340734,\n \"stddev_ns\": 10038590,\n \"avg_ts\": 4.720401,\n \"stddev_ts\": 0.001747,\n \"samples_ns\": [ 27127777664, 27112239844, 27109004696 ],\n \"samples_ts\": [ 4.71841, 4.72111, 4.72168 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T03:25:38Z", + "avg_ns": 8246763011, + "stddev_ns": 8663848, + "avg_ts": 15.521253, + "stddev_ts": 0.016304, + "samples_ns": [ + 8238432513, + 8255725590, + 8246130930 + ], + "samples_ts": [ + 15.5369, + 15.5044, + 15.5224 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T03:26:11Z", + "avg_ns": 27116340734, + "stddev_ns": 10038590, + "avg_ts": 4.720401, + "stddev_ts": 0.001747, + "samples_ns": [ + 27127777664, + 27112239844, + 27109004696 + ], + "samples_ts": [ + 4.71841, + 4.72111, + 4.72168 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 964 + }, + { + "timestamp_utc": "2025-12-10T03:33:36.876239+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:27:35Z\",\n \"avg_ns\": 8251336242,\n \"stddev_ns\": 9262998,\n \"avg_ts\": 15.512652,\n \"stddev_ts\": 0.017403,\n \"samples_ns\": [ 8261931274, 8244772289, 8247305164 ],\n \"samples_ts\": [ 15.4927, 15.525, 15.5202 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:28:08Z\",\n \"avg_ns\": 109486257436,\n \"stddev_ns\": 355895634,\n \"avg_ts\": 4.676419,\n \"stddev_ts\": 0.015173,\n \"samples_ns\": [ 109280711393, 109280850606, 109897210309 ],\n \"samples_ts\": [ 4.68518, 4.68518, 4.6589 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T03:27:35Z", + "avg_ns": 8251336242, + "stddev_ns": 9262998, + "avg_ts": 15.512652, + "stddev_ts": 0.017403, + "samples_ns": [ + 8261931274, + 8244772289, + 8247305164 + ], + "samples_ts": [ + 15.4927, + 15.525, + 15.5202 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T03:28:08Z", + "avg_ns": 109486257436, + "stddev_ns": 355895634, + "avg_ts": 4.676419, + "stddev_ts": 0.015173, + "samples_ns": [ + 109280711393, + 109280850606, + 109897210309 + ], + "samples_ts": [ + 4.68518, + 4.68518, + 4.6589 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 965 + }, + { + "timestamp_utc": "2025-12-10T03:37:13.706376+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:33:39Z\",\n \"avg_ns\": 33322303268,\n \"stddev_ns\": 13052999,\n \"avg_ts\": 15.365086,\n \"stddev_ts\": 0.006017,\n \"samples_ns\": [ 33336334484, 33320047692, 33310527630 ],\n \"samples_ts\": [ 15.3586, 15.3661, 15.3705 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:35:52Z\",\n \"avg_ns\": 27018966572,\n \"stddev_ns\": 21006339,\n \"avg_ts\": 4.737415,\n \"stddev_ts\": 0.003682,\n \"samples_ns\": [ 27043206307, 27007616584, 27006076825 ],\n \"samples_ts\": [ 4.73317, 4.7394, 4.73967 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T03:33:39Z", + "avg_ns": 33322303268, + "stddev_ns": 13052999, + "avg_ts": 15.365086, + "stddev_ts": 0.006017, + "samples_ns": [ + 33336334484, + 33320047692, + 33310527630 + ], + "samples_ts": [ + 15.3586, + 15.3661, + 15.3705 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T03:35:52Z", + "avg_ns": 27018966572, + "stddev_ns": 21006339, + "avg_ts": 4.737415, + "stddev_ts": 0.003682, + "samples_ns": [ + 27043206307, + 27007616584, + 27006076825 + ], + "samples_ts": [ + 4.73317, + 4.7394, + 4.73967 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 966 + }, + { + "timestamp_utc": "2025-12-10T03:44:57.686790+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:37:15Z\",\n \"avg_ns\": 33303312454,\n \"stddev_ns\": 10740848,\n \"avg_ts\": 15.373847,\n \"stddev_ts\": 0.004959,\n \"samples_ns\": [ 33313088328, 33291814673, 33305034361 ],\n \"samples_ts\": [ 15.3693, 15.3792, 15.3731 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:39:29Z\",\n \"avg_ns\": 109425826375,\n \"stddev_ns\": 36831105,\n \"avg_ts\": 4.678969,\n \"stddev_ts\": 0.001575,\n \"samples_ns\": [ 109465158921, 109420164890, 109392155315 ],\n \"samples_ts\": [ 4.67729, 4.67921, 4.68041 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T03:37:15Z", + "avg_ns": 33303312454, + "stddev_ns": 10740848, + "avg_ts": 15.373847, + "stddev_ts": 0.004959, + "samples_ns": [ + 33313088328, + 33291814673, + 33305034361 + ], + "samples_ts": [ + 15.3693, + 15.3792, + 15.3731 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T03:39:29Z", + "avg_ns": 109425826375, + "stddev_ns": 36831105, + "avg_ts": 4.678969, + "stddev_ts": 0.001575, + "samples_ns": [ + 109465158921, + 109420164890, + 109392155315 + ], + "samples_ts": [ + 4.67729, + 4.67921, + 4.68041 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 967 + }, + { + "timestamp_utc": "2025-12-10T03:46:54.386770+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:44:59Z\",\n \"avg_ns\": 8251708844,\n \"stddev_ns\": 2790907,\n \"avg_ts\": 15.511940,\n \"stddev_ts\": 0.005240,\n \"samples_ns\": [ 8254898880, 8250488549, 8249739105 ],\n \"samples_ts\": [ 15.5059, 15.5142, 15.5156 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:45:32Z\",\n \"avg_ns\": 27072106627,\n \"stddev_ns\": 10293841,\n \"avg_ts\": 4.728114,\n \"stddev_ts\": 0.001798,\n \"samples_ns\": [ 27082672988, 27062108876, 27071538017 ],\n \"samples_ts\": [ 4.72627, 4.72986, 4.72821 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T03:44:59Z", + "avg_ns": 8251708844, + "stddev_ns": 2790907, + "avg_ts": 15.51194, + "stddev_ts": 0.00524, + "samples_ns": [ + 8254898880, + 8250488549, + 8249739105 + ], + "samples_ts": [ + 15.5059, + 15.5142, + 15.5156 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T03:45:32Z", + "avg_ns": 27072106627, + "stddev_ns": 10293841, + "avg_ts": 4.728114, + "stddev_ts": 0.001798, + "samples_ns": [ + 27082672988, + 27062108876, + 27071538017 + ], + "samples_ts": [ + 4.72627, + 4.72986, + 4.72821 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 968 + }, + { + "timestamp_utc": "2025-12-10T03:52:59.131819+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:46:56Z\",\n \"avg_ns\": 8257481560,\n \"stddev_ns\": 20339401,\n \"avg_ts\": 15.501157,\n \"stddev_ts\": 0.038131,\n \"samples_ns\": [ 8243350739, 8280792676, 8248301266 ],\n \"samples_ts\": [ 15.5277, 15.4575, 15.5183 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:47:29Z\",\n \"avg_ns\": 109747240667,\n \"stddev_ns\": 794939692,\n \"avg_ts\": 4.665428,\n \"stddev_ts\": 0.033672,\n \"samples_ns\": [ 109155485055, 109435419645, 110650817303 ],\n \"samples_ts\": [ 4.69056, 4.67856, 4.62717 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T03:46:56Z", + "avg_ns": 8257481560, + "stddev_ns": 20339401, + "avg_ts": 15.501157, + "stddev_ts": 0.038131, + "samples_ns": [ + 8243350739, + 8280792676, + 8248301266 + ], + "samples_ts": [ + 15.5277, + 15.4575, + 15.5183 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T03:47:29Z", + "avg_ns": 109747240667, + "stddev_ns": 794939692, + "avg_ts": 4.665428, + "stddev_ts": 0.033672, + "samples_ns": [ + 109155485055, + 109435419645, + 110650817303 + ], + "samples_ts": [ + 4.69056, + 4.67856, + 4.62717 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 969 + }, + { + "timestamp_utc": "2025-12-10T03:56:39.820817+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:53:01Z\",\n \"avg_ns\": 33803311970,\n \"stddev_ns\": 5650980,\n \"avg_ts\": 15.146445,\n \"stddev_ts\": 0.002532,\n \"samples_ns\": [ 33809680620, 33801358011, 33798897279 ],\n \"samples_ts\": [ 15.1436, 15.1473, 15.1484 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:55:16Z\",\n \"avg_ns\": 27659878454,\n \"stddev_ns\": 424618376,\n \"avg_ts\": 4.628375,\n \"stddev_ts\": 0.071686,\n \"samples_ns\": [ 27169719047, 27894540543, 27915375774 ],\n \"samples_ts\": [ 4.71113, 4.58871, 4.58529 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T03:53:01Z", + "avg_ns": 33803311970, + "stddev_ns": 5650980, + "avg_ts": 15.146445, + "stddev_ts": 0.002532, + "samples_ns": [ + 33809680620, + 33801358011, + 33798897279 + ], + "samples_ts": [ + 15.1436, + 15.1473, + 15.1484 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T03:55:16Z", + "avg_ns": 27659878454, + "stddev_ns": 424618376, + "avg_ts": 4.628375, + "stddev_ts": 0.071686, + "samples_ns": [ + 27169719047, + 27894540543, + 27915375774 + ], + "samples_ts": [ + 4.71113, + 4.58871, + 4.58529 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 970 + }, + { + "timestamp_utc": "2025-12-10T04:04:31.415916+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:56:41Z\",\n \"avg_ns\": 33807236291,\n \"stddev_ns\": 2592138,\n \"avg_ts\": 15.144687,\n \"stddev_ts\": 0.001161,\n \"samples_ns\": [ 33804585340, 33807358220, 33809765313 ],\n \"samples_ts\": [ 15.1459, 15.1446, 15.1436 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T03:58:57Z\",\n \"avg_ns\": 111284663287,\n \"stddev_ns\": 1103238067,\n \"avg_ts\": 4.601114,\n \"stddev_ts\": 0.045428,\n \"samples_ns\": [ 110389677293, 112517257734, 110947054834 ],\n \"samples_ts\": [ 4.63811, 4.55041, 4.61481 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T03:56:41Z", + "avg_ns": 33807236291, + "stddev_ns": 2592138, + "avg_ts": 15.144687, + "stddev_ts": 0.001161, + "samples_ns": [ + 33804585340, + 33807358220, + 33809765313 + ], + "samples_ts": [ + 15.1459, + 15.1446, + 15.1436 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T03:58:57Z", + "avg_ns": 111284663287, + "stddev_ns": 1103238067, + "avg_ts": 4.601114, + "stddev_ts": 0.045428, + "samples_ns": [ + 110389677293, + 112517257734, + 110947054834 + ], + "samples_ts": [ + 4.63811, + 4.55041, + 4.61481 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 971 + }, + { + "timestamp_utc": "2025-12-10T04:06:28.464178+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:04:33Z\",\n \"avg_ns\": 6469432855,\n \"stddev_ns\": 19761769,\n \"avg_ts\": 19.785474,\n \"stddev_ts\": 0.060409,\n \"samples_ns\": [ 6490117354, 6450746252, 6467434961 ],\n \"samples_ts\": [ 19.7223, 19.8427, 19.7915 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:04:59Z\",\n \"avg_ns\": 29569858560,\n \"stddev_ns\": 100339267,\n \"avg_ts\": 4.328765,\n \"stddev_ts\": 0.014700,\n \"samples_ns\": [ 29462625987, 29585477209, 29661472485 ],\n \"samples_ts\": [ 4.34449, 4.32645, 4.31536 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T04:04:33Z", + "avg_ns": 6469432855, + "stddev_ns": 19761769, + "avg_ts": 19.785474, + "stddev_ts": 0.060409, + "samples_ns": [ + 6490117354, + 6450746252, + 6467434961 + ], + "samples_ts": [ + 19.7223, + 19.8427, + 19.7915 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T04:04:59Z", + "avg_ns": 29569858560, + "stddev_ns": 100339267, + "avg_ts": 4.328765, + "stddev_ts": 0.0147, + "samples_ns": [ + 29462625987, + 29585477209, + 29661472485 + ], + "samples_ts": [ + 4.34449, + 4.32645, + 4.31536 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 972 + }, + { + "timestamp_utc": "2025-12-10T04:12:55.210792+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:06:30Z\",\n \"avg_ns\": 6470667386,\n \"stddev_ns\": 22823280,\n \"avg_ts\": 19.781740,\n \"stddev_ts\": 0.069633,\n \"samples_ns\": [ 6456546096, 6458457743, 6496998319 ],\n \"samples_ts\": [ 19.8248, 19.819, 19.7014 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:06:56Z\",\n \"avg_ns\": 119451362066,\n \"stddev_ns\": 211694485,\n \"avg_ts\": 4.286272,\n \"stddev_ts\": 0.007604,\n \"samples_ns\": [ 119563977244, 119207164397, 119582944559 ],\n \"samples_ts\": [ 4.28223, 4.29504, 4.28155 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T04:06:30Z", + "avg_ns": 6470667386, + "stddev_ns": 22823280, + "avg_ts": 19.78174, + "stddev_ts": 0.069633, + "samples_ns": [ + 6456546096, + 6458457743, + 6496998319 + ], + "samples_ts": [ + 19.8248, + 19.819, + 19.7014 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T04:06:56Z", + "avg_ns": 119451362066, + "stddev_ns": 211694485, + "avg_ts": 4.286272, + "stddev_ts": 0.007604, + "samples_ns": [ + 119563977244, + 119207164397, + 119582944559 + ], + "samples_ts": [ + 4.28223, + 4.29504, + 4.28155 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 973 + }, + { + "timestamp_utc": "2025-12-10T04:16:10.233788+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:12:57Z\",\n \"avg_ns\": 25929759129,\n \"stddev_ns\": 39647502,\n \"avg_ts\": 19.745683,\n \"stddev_ts\": 0.030202,\n \"samples_ns\": [ 25966195843, 25935544914, 25887536630 ],\n \"samples_ts\": [ 19.7179, 19.7412, 19.7779 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:14:41Z\",\n \"avg_ns\": 29606901453,\n \"stddev_ns\": 29295053,\n \"avg_ts\": 4.323319,\n \"stddev_ts\": 0.004277,\n \"samples_ns\": [ 29605160040, 29637027358, 29578516963 ],\n \"samples_ts\": [ 4.32357, 4.31892, 4.32747 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T04:12:57Z", + "avg_ns": 25929759129, + "stddev_ns": 39647502, + "avg_ts": 19.745683, + "stddev_ts": 0.030202, + "samples_ns": [ + 25966195843, + 25935544914, + 25887536630 + ], + "samples_ts": [ + 19.7179, + 19.7412, + 19.7779 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T04:14:41Z", + "avg_ns": 29606901453, + "stddev_ns": 29295053, + "avg_ts": 4.323319, + "stddev_ts": 0.004277, + "samples_ns": [ + 29605160040, + 29637027358, + 29578516963 + ], + "samples_ts": [ + 4.32357, + 4.31892, + 4.32747 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 974 + }, + { + "timestamp_utc": "2025-12-10T04:23:55.675290+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:16:12Z\",\n \"avg_ns\": 25932183472,\n \"stddev_ns\": 3331027,\n \"avg_ts\": 19.743806,\n \"stddev_ts\": 0.002530,\n \"samples_ns\": [ 25936020555, 25930226433, 25930303430 ],\n \"samples_ts\": [ 19.7409, 19.7453, 19.7452 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:17:56Z\",\n \"avg_ns\": 119719980415,\n \"stddev_ns\": 230534733,\n \"avg_ts\": 4.276657,\n \"stddev_ts\": 0.008226,\n \"samples_ns\": [ 119589728463, 119584054544, 119986158240 ],\n \"samples_ts\": [ 4.2813, 4.28151, 4.26716 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T04:16:12Z", + "avg_ns": 25932183472, + "stddev_ns": 3331027, + "avg_ts": 19.743806, + "stddev_ts": 0.00253, + "samples_ns": [ + 25936020555, + 25930226433, + 25930303430 + ], + "samples_ts": [ + 19.7409, + 19.7453, + 19.7452 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T04:17:56Z", + "avg_ns": 119719980415, + "stddev_ns": 230534733, + "avg_ts": 4.276657, + "stddev_ts": 0.008226, + "samples_ns": [ + 119589728463, + 119584054544, + 119986158240 + ], + "samples_ts": [ + 4.2813, + 4.28151, + 4.26716 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 975 + }, + { + "timestamp_utc": "2025-12-10T04:25:52.803580+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:23:57Z\",\n \"avg_ns\": 6452937847,\n \"stddev_ns\": 2437295,\n \"avg_ts\": 19.835928,\n \"stddev_ts\": 0.007483,\n \"samples_ns\": [ 6452034431, 6455695067, 6451084045 ],\n \"samples_ts\": [ 19.8387, 19.8275, 19.8416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:24:23Z\",\n \"avg_ns\": 29595549186,\n \"stddev_ns\": 83637761,\n \"avg_ts\": 4.324998,\n \"stddev_ts\": 0.012232,\n \"samples_ns\": [ 29670404110, 29610968402, 29505275048 ],\n \"samples_ts\": [ 4.31406, 4.32272, 4.33821 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T04:23:57Z", + "avg_ns": 6452937847, + "stddev_ns": 2437295, + "avg_ts": 19.835928, + "stddev_ts": 0.007483, + "samples_ns": [ + 6452034431, + 6455695067, + 6451084045 + ], + "samples_ts": [ + 19.8387, + 19.8275, + 19.8416 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T04:24:23Z", + "avg_ns": 29595549186, + "stddev_ns": 83637761, + "avg_ts": 4.324998, + "stddev_ts": 0.012232, + "samples_ns": [ + 29670404110, + 29610968402, + 29505275048 + ], + "samples_ts": [ + 4.31406, + 4.32272, + 4.33821 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 976 + }, + { + "timestamp_utc": "2025-12-10T04:32:20.082875+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:25:54Z\",\n \"avg_ns\": 6465186223,\n \"stddev_ns\": 18362825,\n \"avg_ts\": 19.798453,\n \"stddev_ts\": 0.056223,\n \"samples_ns\": [ 6483881621, 6464501204, 6447175846 ],\n \"samples_ts\": [ 19.7413, 19.8004, 19.8537 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:26:20Z\",\n \"avg_ns\": 119645130163,\n \"stddev_ns\": 156079853,\n \"avg_ts\": 4.279327,\n \"stddev_ts\": 0.005578,\n \"samples_ns\": [ 119825121968, 119563079912, 119547188609 ],\n \"samples_ts\": [ 4.27289, 4.28226, 4.28283 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T04:25:54Z", + "avg_ns": 6465186223, + "stddev_ns": 18362825, + "avg_ts": 19.798453, + "stddev_ts": 0.056223, + "samples_ns": [ + 6483881621, + 6464501204, + 6447175846 + ], + "samples_ts": [ + 19.7413, + 19.8004, + 19.8537 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T04:26:20Z", + "avg_ns": 119645130163, + "stddev_ns": 156079853, + "avg_ts": 4.279327, + "stddev_ts": 0.005578, + "samples_ns": [ + 119825121968, + 119563079912, + 119547188609 + ], + "samples_ts": [ + 4.27289, + 4.28226, + 4.28283 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 977 + }, + { + "timestamp_utc": "2025-12-10T04:35:35.606045+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:32:22Z\",\n \"avg_ns\": 26087362106,\n \"stddev_ns\": 13930638,\n \"avg_ts\": 19.626365,\n \"stddev_ts\": 0.010477,\n \"samples_ns\": [ 26076929791, 26081975826, 26103180702 ],\n \"samples_ts\": [ 19.6342, 19.6304, 19.6145 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:34:06Z\",\n \"avg_ns\": 29551221569,\n \"stddev_ns\": 37152501,\n \"avg_ts\": 4.331467,\n \"stddev_ts\": 0.005449,\n \"samples_ns\": [ 29509135496, 29579463292, 29565065921 ],\n \"samples_ts\": [ 4.33764, 4.32733, 4.32943 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T04:32:22Z", + "avg_ns": 26087362106, + "stddev_ns": 13930638, + "avg_ts": 19.626365, + "stddev_ts": 0.010477, + "samples_ns": [ + 26076929791, + 26081975826, + 26103180702 + ], + "samples_ts": [ + 19.6342, + 19.6304, + 19.6145 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T04:34:06Z", + "avg_ns": 29551221569, + "stddev_ns": 37152501, + "avg_ts": 4.331467, + "stddev_ts": 0.005449, + "samples_ns": [ + 29509135496, + 29579463292, + 29565065921 + ], + "samples_ts": [ + 4.33764, + 4.32733, + 4.32943 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 978 + }, + { + "timestamp_utc": "2025-12-10T04:43:21.366864+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:35:37Z\",\n \"avg_ns\": 26095257920,\n \"stddev_ns\": 19337584,\n \"avg_ts\": 19.620430,\n \"stddev_ts\": 0.014539,\n \"samples_ns\": [ 26114780639, 26076112336, 26094880786 ],\n \"samples_ts\": [ 19.6058, 19.6348, 19.6207 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:37:22Z\",\n \"avg_ns\": 119611135438,\n \"stddev_ns\": 102123457,\n \"avg_ts\": 4.280540,\n \"stddev_ts\": 0.003653,\n \"samples_ns\": [ 119725779893, 119577720692, 119529905730 ],\n \"samples_ts\": [ 4.27644, 4.28173, 4.28345 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T04:35:37Z", + "avg_ns": 26095257920, + "stddev_ns": 19337584, + "avg_ts": 19.62043, + "stddev_ts": 0.014539, + "samples_ns": [ + 26114780639, + 26076112336, + 26094880786 + ], + "samples_ts": [ + 19.6058, + 19.6348, + 19.6207 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T04:37:22Z", + "avg_ns": 119611135438, + "stddev_ns": 102123457, + "avg_ts": 4.28054, + "stddev_ts": 0.003653, + "samples_ns": [ + 119725779893, + 119577720692, + 119529905730 + ], + "samples_ts": [ + 4.27644, + 4.28173, + 4.28345 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 979 + }, + { + "timestamp_utc": "2025-12-10T04:45:18.303200+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:43:23Z\",\n \"avg_ns\": 6456026354,\n \"stddev_ns\": 11385010,\n \"avg_ts\": 19.826478,\n \"stddev_ts\": 0.034961,\n \"samples_ns\": [ 6467451989, 6455943520, 6444683555 ],\n \"samples_ts\": [ 19.7914, 19.8267, 19.8613 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:43:49Z\",\n \"avg_ns\": 29545970203,\n \"stddev_ns\": 33389359,\n \"avg_ts\": 4.332236,\n \"stddev_ts\": 0.004894,\n \"samples_ns\": [ 29582709506, 29517478400, 29537722705 ],\n \"samples_ts\": [ 4.32685, 4.33641, 4.33344 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T04:43:23Z", + "avg_ns": 6456026354, + "stddev_ns": 11385010, + "avg_ts": 19.826478, + "stddev_ts": 0.034961, + "samples_ns": [ + 6467451989, + 6455943520, + 6444683555 + ], + "samples_ts": [ + 19.7914, + 19.8267, + 19.8613 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T04:43:49Z", + "avg_ns": 29545970203, + "stddev_ns": 33389359, + "avg_ts": 4.332236, + "stddev_ts": 0.004894, + "samples_ns": [ + 29582709506, + 29517478400, + 29537722705 + ], + "samples_ts": [ + 4.32685, + 4.33641, + 4.33344 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 980 + }, + { + "timestamp_utc": "2025-12-10T04:51:45.724424+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:45:20Z\",\n \"avg_ns\": 6456802416,\n \"stddev_ns\": 22865134,\n \"avg_ts\": 19.824220,\n \"stddev_ts\": 0.070091,\n \"samples_ns\": [ 6449078497, 6438799708, 6482529044 ],\n \"samples_ts\": [ 19.8478, 19.8795, 19.7454 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:45:46Z\",\n \"avg_ns\": 119683230976,\n \"stddev_ns\": 91006109,\n \"avg_ts\": 4.277961,\n \"stddev_ts\": 0.003252,\n \"samples_ns\": [ 119606495645, 119783774832, 119659422451 ],\n \"samples_ts\": [ 4.2807, 4.27437, 4.27881 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T04:45:20Z", + "avg_ns": 6456802416, + "stddev_ns": 22865134, + "avg_ts": 19.82422, + "stddev_ts": 0.070091, + "samples_ns": [ + 6449078497, + 6438799708, + 6482529044 + ], + "samples_ts": [ + 19.8478, + 19.8795, + 19.7454 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T04:45:46Z", + "avg_ns": 119683230976, + "stddev_ns": 91006109, + "avg_ts": 4.277961, + "stddev_ts": 0.003252, + "samples_ns": [ + 119606495645, + 119783774832, + 119659422451 + ], + "samples_ts": [ + 4.2807, + 4.27437, + 4.27881 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 981 + }, + { + "timestamp_utc": "2025-12-10T04:55:02.932419+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:51:47Z\",\n \"avg_ns\": 26478247927,\n \"stddev_ns\": 11168512,\n \"avg_ts\": 19.336629,\n \"stddev_ts\": 0.008157,\n \"samples_ns\": [ 26483300625, 26465447248, 26485995909 ],\n \"samples_ts\": [ 19.3329, 19.346, 19.331 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:53:33Z\",\n \"avg_ns\": 29596097611,\n \"stddev_ns\": 39151305,\n \"avg_ts\": 4.324900,\n \"stddev_ts\": 0.005725,\n \"samples_ns\": [ 29614572258, 29622592362, 29551128215 ],\n \"samples_ts\": [ 4.3222, 4.32103, 4.33148 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T04:51:47Z", + "avg_ns": 26478247927, + "stddev_ns": 11168512, + "avg_ts": 19.336629, + "stddev_ts": 0.008157, + "samples_ns": [ + 26483300625, + 26465447248, + 26485995909 + ], + "samples_ts": [ + 19.3329, + 19.346, + 19.331 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T04:53:33Z", + "avg_ns": 29596097611, + "stddev_ns": 39151305, + "avg_ts": 4.3249, + "stddev_ts": 0.005725, + "samples_ns": [ + 29614572258, + 29622592362, + 29551128215 + ], + "samples_ts": [ + 4.3222, + 4.32103, + 4.33148 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 982 + }, + { + "timestamp_utc": "2025-12-10T05:02:50.597263+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:55:05Z\",\n \"avg_ns\": 26477762503,\n \"stddev_ns\": 9769962,\n \"avg_ts\": 19.336983,\n \"stddev_ts\": 0.007134,\n \"samples_ns\": [ 26488838827, 26474078646, 26470370036 ],\n \"samples_ts\": [ 19.3289, 19.3397, 19.3424 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T04:56:51Z\",\n \"avg_ns\": 119746387476,\n \"stddev_ns\": 97153107,\n \"avg_ts\": 4.275705,\n \"stddev_ts\": 0.003471,\n \"samples_ns\": [ 119634204742, 119802435725, 119802521961 ],\n \"samples_ts\": [ 4.27971, 4.2737, 4.2737 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T04:55:05Z", + "avg_ns": 26477762503, + "stddev_ns": 9769962, + "avg_ts": 19.336983, + "stddev_ts": 0.007134, + "samples_ns": [ + 26488838827, + 26474078646, + 26470370036 + ], + "samples_ts": [ + 19.3289, + 19.3397, + 19.3424 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T04:56:51Z", + "avg_ns": 119746387476, + "stddev_ns": 97153107, + "avg_ts": 4.275705, + "stddev_ts": 0.003471, + "samples_ns": [ + 119634204742, + 119802435725, + 119802521961 + ], + "samples_ts": [ + 4.27971, + 4.2737, + 4.2737 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 983 + }, + { + "timestamp_utc": "2025-12-10T05:04:47.994900+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:02:52Z\",\n \"avg_ns\": 6458054530,\n \"stddev_ns\": 7759440,\n \"avg_ts\": 19.820230,\n \"stddev_ts\": 0.023798,\n \"samples_ns\": [ 6453374919, 6467011348, 6453777323 ],\n \"samples_ts\": [ 19.8346, 19.7928, 19.8333 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:03:18Z\",\n \"avg_ns\": 29695907932,\n \"stddev_ns\": 76054982,\n \"avg_ts\": 4.310377,\n \"stddev_ts\": 0.011055,\n \"samples_ns\": [ 29747824332, 29731292017, 29608607447 ],\n \"samples_ts\": [ 4.30284, 4.30523, 4.32307 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T05:02:52Z", + "avg_ns": 6458054530, + "stddev_ns": 7759440, + "avg_ts": 19.82023, + "stddev_ts": 0.023798, + "samples_ns": [ + 6453374919, + 6467011348, + 6453777323 + ], + "samples_ts": [ + 19.8346, + 19.7928, + 19.8333 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T05:03:18Z", + "avg_ns": 29695907932, + "stddev_ns": 76054982, + "avg_ts": 4.310377, + "stddev_ts": 0.011055, + "samples_ns": [ + 29747824332, + 29731292017, + 29608607447 + ], + "samples_ts": [ + 4.30284, + 4.30523, + 4.32307 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 984 + }, + { + "timestamp_utc": "2025-12-10T05:11:15.843169+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:04:50Z\",\n \"avg_ns\": 6445160784,\n \"stddev_ns\": 5489670,\n \"avg_ts\": 19.859871,\n \"stddev_ts\": 0.016908,\n \"samples_ns\": [ 6451432831, 6441229426, 6442820095 ],\n \"samples_ts\": [ 19.8406, 19.872, 19.8671 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:05:15Z\",\n \"avg_ns\": 119830584863,\n \"stddev_ns\": 120856508,\n \"avg_ts\": 4.272702,\n \"stddev_ts\": 0.004307,\n \"samples_ns\": [ 119738720286, 119967495008, 119785539295 ],\n \"samples_ts\": [ 4.27598, 4.26782, 4.27431 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T05:04:50Z", + "avg_ns": 6445160784, + "stddev_ns": 5489670, + "avg_ts": 19.859871, + "stddev_ts": 0.016908, + "samples_ns": [ + 6451432831, + 6441229426, + 6442820095 + ], + "samples_ts": [ + 19.8406, + 19.872, + 19.8671 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T05:05:15Z", + "avg_ns": 119830584863, + "stddev_ns": 120856508, + "avg_ts": 4.272702, + "stddev_ts": 0.004307, + "samples_ns": [ + 119738720286, + 119967495008, + 119785539295 + ], + "samples_ts": [ + 4.27598, + 4.26782, + 4.27431 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 985 + }, + { + "timestamp_utc": "2025-12-10T05:14:31.077553+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:11:18Z\",\n \"avg_ns\": 25948685408,\n \"stddev_ns\": 28697814,\n \"avg_ts\": 19.731266,\n \"stddev_ts\": 0.021813,\n \"samples_ns\": [ 25980456882, 25924645246, 25940954096 ],\n \"samples_ts\": [ 19.7071, 19.7495, 19.7371 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:13:01Z\",\n \"avg_ns\": 29614159816,\n \"stddev_ns\": 39381101,\n \"avg_ts\": 4.322262,\n \"stddev_ts\": 0.005745,\n \"samples_ns\": [ 29605436797, 29657170689, 29579871963 ],\n \"samples_ts\": [ 4.32353, 4.31599, 4.32727 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T05:11:18Z", + "avg_ns": 25948685408, + "stddev_ns": 28697814, + "avg_ts": 19.731266, + "stddev_ts": 0.021813, + "samples_ns": [ + 25980456882, + 25924645246, + 25940954096 + ], + "samples_ts": [ + 19.7071, + 19.7495, + 19.7371 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T05:13:01Z", + "avg_ns": 29614159816, + "stddev_ns": 39381101, + "avg_ts": 4.322262, + "stddev_ts": 0.005745, + "samples_ns": [ + 29605436797, + 29657170689, + 29579871963 + ], + "samples_ts": [ + 4.32353, + 4.31599, + 4.32727 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 986 + }, + { + "timestamp_utc": "2025-12-10T05:22:16.402647+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:14:33Z\",\n \"avg_ns\": 25893229778,\n \"stddev_ns\": 6854464,\n \"avg_ts\": 19.773509,\n \"stddev_ts\": 0.005234,\n \"samples_ns\": [ 25900898556, 25887699492, 25891091286 ],\n \"samples_ts\": [ 19.7677, 19.7777, 19.7751 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:16:16Z\",\n \"avg_ns\": 119736869633,\n \"stddev_ns\": 135605232,\n \"avg_ts\": 4.276047,\n \"stddev_ts\": 0.004845,\n \"samples_ns\": [ 119586430492, 119774475009, 119849703398 ],\n \"samples_ts\": [ 4.28142, 4.2747, 4.27202 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T05:14:33Z", + "avg_ns": 25893229778, + "stddev_ns": 6854464, + "avg_ts": 19.773509, + "stddev_ts": 0.005234, + "samples_ns": [ + 25900898556, + 25887699492, + 25891091286 + ], + "samples_ts": [ + 19.7677, + 19.7777, + 19.7751 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T05:16:16Z", + "avg_ns": 119736869633, + "stddev_ns": 135605232, + "avg_ts": 4.276047, + "stddev_ts": 0.004845, + "samples_ns": [ + 119586430492, + 119774475009, + 119849703398 + ], + "samples_ts": [ + 4.28142, + 4.2747, + 4.27202 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 987 + }, + { + "timestamp_utc": "2025-12-10T05:24:13.318634+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:22:18Z\",\n \"avg_ns\": 6440912978,\n \"stddev_ns\": 2043588,\n \"avg_ts\": 19.872960,\n \"stddev_ts\": 0.006306,\n \"samples_ns\": [ 6441426161, 6438661710, 6442651063 ],\n \"samples_ts\": [ 19.8714, 19.8799, 19.8676 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:22:44Z\",\n \"avg_ns\": 29532461821,\n \"stddev_ns\": 16013583,\n \"avg_ts\": 4.334215,\n \"stddev_ts\": 0.002350,\n \"samples_ns\": [ 29516896970, 29531602643, 29548885852 ],\n \"samples_ts\": [ 4.3365, 4.33434, 4.3318 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T05:22:18Z", + "avg_ns": 6440912978, + "stddev_ns": 2043588, + "avg_ts": 19.87296, + "stddev_ts": 0.006306, + "samples_ns": [ + 6441426161, + 6438661710, + 6442651063 + ], + "samples_ts": [ + 19.8714, + 19.8799, + 19.8676 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T05:22:44Z", + "avg_ns": 29532461821, + "stddev_ns": 16013583, + "avg_ts": 4.334215, + "stddev_ts": 0.00235, + "samples_ns": [ + 29516896970, + 29531602643, + 29548885852 + ], + "samples_ts": [ + 4.3365, + 4.33434, + 4.3318 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 988 + }, + { + "timestamp_utc": "2025-12-10T05:30:40.457335+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:24:15Z\",\n \"avg_ns\": 6455051568,\n \"stddev_ns\": 19900818,\n \"avg_ts\": 19.829556,\n \"stddev_ts\": 0.061030,\n \"samples_ns\": [ 6445425960, 6477935159, 6441793585 ],\n \"samples_ts\": [ 19.859, 19.7594, 19.8702 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:24:41Z\",\n \"avg_ns\": 119612426022,\n \"stddev_ns\": 139355783,\n \"avg_ts\": 4.280496,\n \"stddev_ts\": 0.004988,\n \"samples_ns\": [ 119465004214, 119630278691, 119741995161 ],\n \"samples_ts\": [ 4.28577, 4.27985, 4.27586 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T05:24:15Z", + "avg_ns": 6455051568, + "stddev_ns": 19900818, + "avg_ts": 19.829556, + "stddev_ts": 0.06103, + "samples_ns": [ + 6445425960, + 6477935159, + 6441793585 + ], + "samples_ts": [ + 19.859, + 19.7594, + 19.8702 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T05:24:41Z", + "avg_ns": 119612426022, + "stddev_ns": 139355783, + "avg_ts": 4.280496, + "stddev_ts": 0.004988, + "samples_ns": [ + 119465004214, + 119630278691, + 119741995161 + ], + "samples_ts": [ + 4.28577, + 4.27985, + 4.27586 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 989 + }, + { + "timestamp_utc": "2025-12-10T05:33:56.223763+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:30:42Z\",\n \"avg_ns\": 26084080147,\n \"stddev_ns\": 9364730,\n \"avg_ts\": 19.628833,\n \"stddev_ts\": 0.007047,\n \"samples_ns\": [ 26087281579, 26091422877, 26073535986 ],\n \"samples_ts\": [ 19.6264, 19.6233, 19.6368 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:32:26Z\",\n \"avg_ns\": 29636156317,\n \"stddev_ns\": 27233993,\n \"avg_ts\": 4.319051,\n \"stddev_ts\": 0.003969,\n \"samples_ns\": [ 29638764355, 29661991926, 29607712671 ],\n \"samples_ts\": [ 4.31867, 4.31529, 4.3232 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T05:30:42Z", + "avg_ns": 26084080147, + "stddev_ns": 9364730, + "avg_ts": 19.628833, + "stddev_ts": 0.007047, + "samples_ns": [ + 26087281579, + 26091422877, + 26073535986 + ], + "samples_ts": [ + 19.6264, + 19.6233, + 19.6368 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T05:32:26Z", + "avg_ns": 29636156317, + "stddev_ns": 27233993, + "avg_ts": 4.319051, + "stddev_ts": 0.003969, + "samples_ns": [ + 29638764355, + 29661991926, + 29607712671 + ], + "samples_ts": [ + 4.31867, + 4.31529, + 4.3232 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 990 + }, + { + "timestamp_utc": "2025-12-10T05:41:41.606192+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:33:58Z\",\n \"avg_ns\": 26106828305,\n \"stddev_ns\": 8455082,\n \"avg_ts\": 19.611729,\n \"stddev_ts\": 0.006350,\n \"samples_ns\": [ 26112626522, 26110727853, 26097130542 ],\n \"samples_ts\": [ 19.6074, 19.6088, 19.619 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:35:42Z\",\n \"avg_ns\": 119474886999,\n \"stddev_ns\": 184309679,\n \"avg_ts\": 4.285426,\n \"stddev_ts\": 0.006609,\n \"samples_ns\": [ 119449972600, 119670386252, 119304302146 ],\n \"samples_ts\": [ 4.28631, 4.27842, 4.29155 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T05:33:58Z", + "avg_ns": 26106828305, + "stddev_ns": 8455082, + "avg_ts": 19.611729, + "stddev_ts": 0.00635, + "samples_ns": [ + 26112626522, + 26110727853, + 26097130542 + ], + "samples_ts": [ + 19.6074, + 19.6088, + 19.619 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T05:35:42Z", + "avg_ns": 119474886999, + "stddev_ns": 184309679, + "avg_ts": 4.285426, + "stddev_ts": 0.006609, + "samples_ns": [ + 119449972600, + 119670386252, + 119304302146 + ], + "samples_ts": [ + 4.28631, + 4.27842, + 4.29155 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 991 + }, + { + "timestamp_utc": "2025-12-10T05:43:38.681094+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:41:43Z\",\n \"avg_ns\": 6455347610,\n \"stddev_ns\": 18175939,\n \"avg_ts\": 19.828626,\n \"stddev_ts\": 0.055753,\n \"samples_ns\": [ 6448408272, 6475970778, 6441663781 ],\n \"samples_ts\": [ 19.8499, 19.7654, 19.8706 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:42:09Z\",\n \"avg_ns\": 29589701232,\n \"stddev_ns\": 11510775,\n \"avg_ts\": 4.325830,\n \"stddev_ts\": 0.001682,\n \"samples_ns\": [ 29583952434, 29582197219, 29602954043 ],\n \"samples_ts\": [ 4.32667, 4.32693, 4.32389 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T05:41:43Z", + "avg_ns": 6455347610, + "stddev_ns": 18175939, + "avg_ts": 19.828626, + "stddev_ts": 0.055753, + "samples_ns": [ + 6448408272, + 6475970778, + 6441663781 + ], + "samples_ts": [ + 19.8499, + 19.7654, + 19.8706 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T05:42:09Z", + "avg_ns": 29589701232, + "stddev_ns": 11510775, + "avg_ts": 4.32583, + "stddev_ts": 0.001682, + "samples_ns": [ + 29583952434, + 29582197219, + 29602954043 + ], + "samples_ts": [ + 4.32667, + 4.32693, + 4.32389 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 992 + }, + { + "timestamp_utc": "2025-12-10T05:50:05.076258+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:43:40Z\",\n \"avg_ns\": 6449517325,\n \"stddev_ns\": 10868918,\n \"avg_ts\": 19.846484,\n \"stddev_ts\": 0.033417,\n \"samples_ns\": [ 6461882746, 6445191825, 6441477405 ],\n \"samples_ts\": [ 19.8085, 19.8598, 19.8712 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:44:06Z\",\n \"avg_ns\": 119361069109,\n \"stddev_ns\": 152594344,\n \"avg_ts\": 4.289510,\n \"stddev_ts\": 0.005480,\n \"samples_ns\": [ 119285765340, 119260764331, 119536677656 ],\n \"samples_ts\": [ 4.29221, 4.29311, 4.2832 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T05:43:40Z", + "avg_ns": 6449517325, + "stddev_ns": 10868918, + "avg_ts": 19.846484, + "stddev_ts": 0.033417, + "samples_ns": [ + 6461882746, + 6445191825, + 6441477405 + ], + "samples_ts": [ + 19.8085, + 19.8598, + 19.8712 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T05:44:06Z", + "avg_ns": 119361069109, + "stddev_ns": 152594344, + "avg_ts": 4.28951, + "stddev_ts": 0.00548, + "samples_ns": [ + 119285765340, + 119260764331, + 119536677656 + ], + "samples_ts": [ + 4.29221, + 4.29311, + 4.2832 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 993 + }, + { + "timestamp_utc": "2025-12-10T05:53:22.132240+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:50:07Z\",\n \"avg_ns\": 26453633204,\n \"stddev_ns\": 13126444,\n \"avg_ts\": 19.354622,\n \"stddev_ts\": 0.009605,\n \"samples_ns\": [ 26438509704, 26460352090, 26462037820 ],\n \"samples_ts\": [ 19.3657, 19.3497, 19.3485 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:51:53Z\",\n \"avg_ns\": 29585555613,\n \"stddev_ns\": 44070996,\n \"avg_ts\": 4.326442,\n \"stddev_ts\": 0.006446,\n \"samples_ns\": [ 29628247118, 29540224454, 29588195268 ],\n \"samples_ts\": [ 4.3202, 4.33307, 4.32605 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T05:50:07Z", + "avg_ns": 26453633204, + "stddev_ns": 13126444, + "avg_ts": 19.354622, + "stddev_ts": 0.009605, + "samples_ns": [ + 26438509704, + 26460352090, + 26462037820 + ], + "samples_ts": [ + 19.3657, + 19.3497, + 19.3485 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T05:51:53Z", + "avg_ns": 29585555613, + "stddev_ns": 44070996, + "avg_ts": 4.326442, + "stddev_ts": 0.006446, + "samples_ns": [ + 29628247118, + 29540224454, + 29588195268 + ], + "samples_ts": [ + 4.3202, + 4.33307, + 4.32605 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 994 + }, + { + "timestamp_utc": "2025-12-10T06:01:09.719644+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:53:24Z\",\n \"avg_ns\": 26441260239,\n \"stddev_ns\": 2790583,\n \"avg_ts\": 19.363676,\n \"stddev_ts\": 0.002037,\n \"samples_ns\": [ 26440675380, 26438818089, 26444287250 ],\n \"samples_ts\": [ 19.3641, 19.3655, 19.3615 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T05:55:10Z\",\n \"avg_ns\": 119754691206,\n \"stddev_ns\": 86051323,\n \"avg_ts\": 4.275408,\n \"stddev_ts\": 0.003071,\n \"samples_ns\": [ 119690435555, 119852456206, 119721181857 ],\n \"samples_ts\": [ 4.2777, 4.27192, 4.2766 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T05:53:24Z", + "avg_ns": 26441260239, + "stddev_ns": 2790583, + "avg_ts": 19.363676, + "stddev_ts": 0.002037, + "samples_ns": [ + 26440675380, + 26438818089, + 26444287250 + ], + "samples_ts": [ + 19.3641, + 19.3655, + 19.3615 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T05:55:10Z", + "avg_ns": 119754691206, + "stddev_ns": 86051323, + "avg_ts": 4.275408, + "stddev_ts": 0.003071, + "samples_ns": [ + 119690435555, + 119852456206, + 119721181857 + ], + "samples_ts": [ + 4.2777, + 4.27192, + 4.2766 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 995 + }, + { + "timestamp_utc": "2025-12-10T06:03:06.902840+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:01:11Z\",\n \"avg_ns\": 6439757116,\n \"stddev_ns\": 740079,\n \"avg_ts\": 19.876526,\n \"stddev_ts\": 0.002257,\n \"samples_ns\": [ 6438919401, 6440268239, 6440083710 ],\n \"samples_ts\": [ 19.8791, 19.8749, 19.8755 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:01:37Z\",\n \"avg_ns\": 29616112140,\n \"stddev_ns\": 60154704,\n \"avg_ts\": 4.321984,\n \"stddev_ts\": 0.008788,\n \"samples_ns\": [ 29547067665, 29657209624, 29644059131 ],\n \"samples_ts\": [ 4.33207, 4.31598, 4.3179 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T06:01:11Z", + "avg_ns": 6439757116, + "stddev_ns": 740079, + "avg_ts": 19.876526, + "stddev_ts": 0.002257, + "samples_ns": [ + 6438919401, + 6440268239, + 6440083710 + ], + "samples_ts": [ + 19.8791, + 19.8749, + 19.8755 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T06:01:37Z", + "avg_ns": 29616112140, + "stddev_ns": 60154704, + "avg_ts": 4.321984, + "stddev_ts": 0.008788, + "samples_ns": [ + 29547067665, + 29657209624, + 29644059131 + ], + "samples_ts": [ + 4.33207, + 4.31598, + 4.3179 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 996 + }, + { + "timestamp_utc": "2025-12-10T06:09:34.307420+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:03:09Z\",\n \"avg_ns\": 6463042623,\n \"stddev_ns\": 10233284,\n \"avg_ts\": 19.804947,\n \"stddev_ts\": 0.031329,\n \"samples_ns\": [ 6474828490, 6457880208, 6456419172 ],\n \"samples_ts\": [ 19.7689, 19.8207, 19.8252 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:03:34Z\",\n \"avg_ns\": 119672118897,\n \"stddev_ns\": 20530175,\n \"avg_ts\": 4.278357,\n \"stddev_ts\": 0.000734,\n \"samples_ns\": [ 119650091544, 119690721044, 119675544103 ],\n \"samples_ts\": [ 4.27914, 4.27769, 4.27823 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T06:03:09Z", + "avg_ns": 6463042623, + "stddev_ns": 10233284, + "avg_ts": 19.804947, + "stddev_ts": 0.031329, + "samples_ns": [ + 6474828490, + 6457880208, + 6456419172 + ], + "samples_ts": [ + 19.7689, + 19.8207, + 19.8252 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T06:03:34Z", + "avg_ns": 119672118897, + "stddev_ns": 20530175, + "avg_ts": 4.278357, + "stddev_ts": 0.000734, + "samples_ns": [ + 119650091544, + 119690721044, + 119675544103 + ], + "samples_ts": [ + 4.27914, + 4.27769, + 4.27823 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 997 + }, + { + "timestamp_utc": "2025-12-10T06:12:49.228610+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:09:36Z\",\n \"avg_ns\": 25909921644,\n \"stddev_ns\": 2491281,\n \"avg_ts\": 19.760770,\n \"stddev_ts\": 0.001896,\n \"samples_ns\": [ 25909463383, 25912604970, 25907696580 ],\n \"samples_ts\": [ 19.7611, 19.7587, 19.7625 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:11:20Z\",\n \"avg_ns\": 29579642830,\n \"stddev_ns\": 3111463,\n \"avg_ts\": 4.327300,\n \"stddev_ts\": 0.000454,\n \"samples_ns\": [ 29577351495, 29578398109, 29583178887 ],\n \"samples_ts\": [ 4.32764, 4.32748, 4.32678 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T06:09:36Z", + "avg_ns": 25909921644, + "stddev_ns": 2491281, + "avg_ts": 19.76077, + "stddev_ts": 0.001896, + "samples_ns": [ + 25909463383, + 25912604970, + 25907696580 + ], + "samples_ts": [ + 19.7611, + 19.7587, + 19.7625 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T06:11:20Z", + "avg_ns": 29579642830, + "stddev_ns": 3111463, + "avg_ts": 4.3273, + "stddev_ts": 0.000454, + "samples_ns": [ + 29577351495, + 29578398109, + 29583178887 + ], + "samples_ts": [ + 4.32764, + 4.32748, + 4.32678 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 998 + }, + { + "timestamp_utc": "2025-12-10T06:20:33.761821+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:12:51Z\",\n \"avg_ns\": 25910076842,\n \"stddev_ns\": 7956780,\n \"avg_ts\": 19.760653,\n \"stddev_ts\": 0.006069,\n \"samples_ns\": [ 25917180667, 25901478931, 25911570928 ],\n \"samples_ts\": [ 19.7552, 19.7672, 19.7595 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:14:35Z\",\n \"avg_ns\": 119455439677,\n \"stddev_ns\": 190119494,\n \"avg_ts\": 4.286124,\n \"stddev_ts\": 0.006819,\n \"samples_ns\": [ 119422508585, 119283937566, 119659872882 ],\n \"samples_ts\": [ 4.2873, 4.29228, 4.27879 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T06:12:51Z", + "avg_ns": 25910076842, + "stddev_ns": 7956780, + "avg_ts": 19.760653, + "stddev_ts": 0.006069, + "samples_ns": [ + 25917180667, + 25901478931, + 25911570928 + ], + "samples_ts": [ + 19.7552, + 19.7672, + 19.7595 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T06:14:35Z", + "avg_ns": 119455439677, + "stddev_ns": 190119494, + "avg_ts": 4.286124, + "stddev_ts": 0.006819, + "samples_ns": [ + 119422508585, + 119283937566, + 119659872882 + ], + "samples_ts": [ + 4.2873, + 4.29228, + 4.27879 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 999 + }, + { + "timestamp_utc": "2025-12-10T06:22:30.849802+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:20:35Z\",\n \"avg_ns\": 6453098549,\n \"stddev_ns\": 15497596,\n \"avg_ts\": 19.835509,\n \"stddev_ts\": 0.047577,\n \"samples_ns\": [ 6470740724, 6441683649, 6446871276 ],\n \"samples_ts\": [ 19.7814, 19.8706, 19.8546 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:21:01Z\",\n \"avg_ns\": 29577683987,\n \"stddev_ns\": 29674272,\n \"avg_ts\": 4.327590,\n \"stddev_ts\": 0.004339,\n \"samples_ns\": [ 29559462102, 29611925282, 29561664577 ],\n \"samples_ts\": [ 4.33025, 4.32258, 4.32993 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T06:20:35Z", + "avg_ns": 6453098549, + "stddev_ns": 15497596, + "avg_ts": 19.835509, + "stddev_ts": 0.047577, + "samples_ns": [ + 6470740724, + 6441683649, + 6446871276 + ], + "samples_ts": [ + 19.7814, + 19.8706, + 19.8546 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T06:21:01Z", + "avg_ns": 29577683987, + "stddev_ns": 29674272, + "avg_ts": 4.32759, + "stddev_ts": 0.004339, + "samples_ns": [ + 29559462102, + 29611925282, + 29561664577 + ], + "samples_ts": [ + 4.33025, + 4.32258, + 4.32993 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1000 + }, + { + "timestamp_utc": "2025-12-10T06:28:57.898883+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:22:32Z\",\n \"avg_ns\": 6552766887,\n \"stddev_ns\": 186395539,\n \"avg_ts\": 19.544107,\n \"stddev_ts\": 0.547016,\n \"samples_ns\": [ 6438168353, 6452288845, 6767843463 ],\n \"samples_ts\": [ 19.8814, 19.8379, 18.913 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:22:59Z\",\n \"avg_ns\": 119469920420,\n \"stddev_ns\": 120868994,\n \"avg_ts\": 4.285600,\n \"stddev_ts\": 0.004333,\n \"samples_ns\": [ 119416069293, 119608355427, 119385336540 ],\n \"samples_ts\": [ 4.28753, 4.28064, 4.28863 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T06:22:32Z", + "avg_ns": 6552766887, + "stddev_ns": 186395539, + "avg_ts": 19.544107, + "stddev_ts": 0.547016, + "samples_ns": [ + 6438168353, + 6452288845, + 6767843463 + ], + "samples_ts": [ + 19.8814, + 19.8379, + 18.913 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T06:22:59Z", + "avg_ns": 119469920420, + "stddev_ns": 120868994, + "avg_ts": 4.2856, + "stddev_ts": 0.004333, + "samples_ns": [ + 119416069293, + 119608355427, + 119385336540 + ], + "samples_ts": [ + 4.28753, + 4.28064, + 4.28863 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1001 + }, + { + "timestamp_utc": "2025-12-10T06:32:13.737286+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:29:00Z\",\n \"avg_ns\": 26103084214,\n \"stddev_ns\": 22620029,\n \"avg_ts\": 19.614550,\n \"stddev_ts\": 0.016998,\n \"samples_ns\": [ 26104599664, 26124907258, 26079745722 ],\n \"samples_ts\": [ 19.6134, 19.5982, 19.6321 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:30:44Z\",\n \"avg_ns\": 29627558807,\n \"stddev_ns\": 50003175,\n \"avg_ts\": 4.320310,\n \"stddev_ts\": 0.007295,\n \"samples_ns\": [ 29573185022, 29671566406, 29637924993 ],\n \"samples_ts\": [ 4.32825, 4.31389, 4.31879 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T06:29:00Z", + "avg_ns": 26103084214, + "stddev_ns": 22620029, + "avg_ts": 19.61455, + "stddev_ts": 0.016998, + "samples_ns": [ + 26104599664, + 26124907258, + 26079745722 + ], + "samples_ts": [ + 19.6134, + 19.5982, + 19.6321 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T06:30:44Z", + "avg_ns": 29627558807, + "stddev_ns": 50003175, + "avg_ts": 4.32031, + "stddev_ts": 0.007295, + "samples_ns": [ + 29573185022, + 29671566406, + 29637924993 + ], + "samples_ts": [ + 4.32825, + 4.31389, + 4.31879 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1002 + }, + { + "timestamp_utc": "2025-12-10T06:39:59.874441+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:32:15Z\",\n \"avg_ns\": 26090830583,\n \"stddev_ns\": 19807813,\n \"avg_ts\": 19.623760,\n \"stddev_ts\": 0.014898,\n \"samples_ns\": [ 26110186112, 26070601993, 26091703646 ],\n \"samples_ts\": [ 19.6092, 19.639, 19.6231 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:34:00Z\",\n \"avg_ns\": 119760472752,\n \"stddev_ns\": 265540414,\n \"avg_ts\": 4.275214,\n \"stddev_ts\": 0.009479,\n \"samples_ns\": [ 119761553642, 120025470622, 119494393994 ],\n \"samples_ts\": [ 4.27516, 4.26576, 4.28472 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T06:32:15Z", + "avg_ns": 26090830583, + "stddev_ns": 19807813, + "avg_ts": 19.62376, + "stddev_ts": 0.014898, + "samples_ns": [ + 26110186112, + 26070601993, + 26091703646 + ], + "samples_ts": [ + 19.6092, + 19.639, + 19.6231 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T06:34:00Z", + "avg_ns": 119760472752, + "stddev_ns": 265540414, + "avg_ts": 4.275214, + "stddev_ts": 0.009479, + "samples_ns": [ + 119761553642, + 120025470622, + 119494393994 + ], + "samples_ts": [ + 4.27516, + 4.26576, + 4.28472 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1003 + }, + { + "timestamp_utc": "2025-12-10T06:41:57.059258+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:40:02Z\",\n \"avg_ns\": 6452750875,\n \"stddev_ns\": 16422609,\n \"avg_ts\": 19.836587,\n \"stddev_ts\": 0.050410,\n \"samples_ns\": [ 6442823960, 6443722141, 6471706526 ],\n \"samples_ts\": [ 19.8671, 19.8643, 19.7784 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:40:27Z\",\n \"avg_ns\": 29612172617,\n \"stddev_ns\": 55953943,\n \"avg_ts\": 4.322557,\n \"stddev_ts\": 0.008177,\n \"samples_ns\": [ 29643447909, 29645495943, 29547574001 ],\n \"samples_ts\": [ 4.31799, 4.31769, 4.332 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T06:40:02Z", + "avg_ns": 6452750875, + "stddev_ns": 16422609, + "avg_ts": 19.836587, + "stddev_ts": 0.05041, + "samples_ns": [ + 6442823960, + 6443722141, + 6471706526 + ], + "samples_ts": [ + 19.8671, + 19.8643, + 19.7784 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T06:40:27Z", + "avg_ns": 29612172617, + "stddev_ns": 55953943, + "avg_ts": 4.322557, + "stddev_ts": 0.008177, + "samples_ns": [ + 29643447909, + 29645495943, + 29547574001 + ], + "samples_ts": [ + 4.31799, + 4.31769, + 4.332 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1004 + }, + { + "timestamp_utc": "2025-12-10T06:48:25.151732+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:41:59Z\",\n \"avg_ns\": 6443462655,\n \"stddev_ns\": 10980135,\n \"avg_ts\": 19.865134,\n \"stddev_ts\": 0.033836,\n \"samples_ns\": [ 6433671946, 6455334391, 6441381628 ],\n \"samples_ts\": [ 19.8953, 19.8286, 19.8715 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:42:25Z\",\n \"avg_ns\": 119919899574,\n \"stddev_ns\": 253759480,\n \"avg_ts\": 4.269529,\n \"stddev_ts\": 0.009025,\n \"samples_ns\": [ 119812749794, 119737290071, 120209658857 ],\n \"samples_ts\": [ 4.27333, 4.27603, 4.25923 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T06:41:59Z", + "avg_ns": 6443462655, + "stddev_ns": 10980135, + "avg_ts": 19.865134, + "stddev_ts": 0.033836, + "samples_ns": [ + 6433671946, + 6455334391, + 6441381628 + ], + "samples_ts": [ + 19.8953, + 19.8286, + 19.8715 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T06:42:25Z", + "avg_ns": 119919899574, + "stddev_ns": 253759480, + "avg_ts": 4.269529, + "stddev_ts": 0.009025, + "samples_ns": [ + 119812749794, + 119737290071, + 120209658857 + ], + "samples_ts": [ + 4.27333, + 4.27603, + 4.25923 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1005 + }, + { + "timestamp_utc": "2025-12-10T06:51:42.531034+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:48:27Z\",\n \"avg_ns\": 26465184244,\n \"stddev_ns\": 9458368,\n \"avg_ts\": 19.346173,\n \"stddev_ts\": 0.006913,\n \"samples_ns\": [ 26475696797, 26462492223, 26457363712 ],\n \"samples_ts\": [ 19.3385, 19.3481, 19.3519 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:50:13Z\",\n \"avg_ns\": 29653063067,\n \"stddev_ns\": 90688243,\n \"avg_ts\": 4.316613,\n \"stddev_ts\": 0.013179,\n \"samples_ns\": [ 29593035786, 29757386008, 29608767408 ],\n \"samples_ts\": [ 4.32534, 4.30145, 4.32304 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T06:48:27Z", + "avg_ns": 26465184244, + "stddev_ns": 9458368, + "avg_ts": 19.346173, + "stddev_ts": 0.006913, + "samples_ns": [ + 26475696797, + 26462492223, + 26457363712 + ], + "samples_ts": [ + 19.3385, + 19.3481, + 19.3519 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T06:50:13Z", + "avg_ns": 29653063067, + "stddev_ns": 90688243, + "avg_ts": 4.316613, + "stddev_ts": 0.013179, + "samples_ns": [ + 29593035786, + 29757386008, + 29608767408 + ], + "samples_ts": [ + 4.32534, + 4.30145, + 4.32304 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1006 + }, + { + "timestamp_utc": "2025-12-10T06:59:29.907087+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:51:44Z\",\n \"avg_ns\": 26466486527,\n \"stddev_ns\": 9883760,\n \"avg_ts\": 19.345222,\n \"stddev_ts\": 0.007222,\n \"samples_ns\": [ 26457214580, 26465364114, 26476880889 ],\n \"samples_ts\": [ 19.352, 19.346, 19.3376 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf\",\n \"model_type\": \"gemma3 4B Q4_K - Medium\",\n \"model_size\": 2483352832,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T06:53:30Z\",\n \"avg_ns\": 119654388823,\n \"stddev_ns\": 242915973,\n \"avg_ts\": 4.279002,\n \"stddev_ts\": 0.008679,\n \"samples_ns\": [ 119929263891, 119468569234, 119565333345 ],\n \"samples_ts\": [ 4.26918, 4.28565, 4.28218 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T06:51:44Z", + "avg_ns": 26466486527, + "stddev_ns": 9883760, + "avg_ts": 19.345222, + "stddev_ts": 0.007222, + "samples_ns": [ + 26457214580, + 26465364114, + 26476880889 + ], + "samples_ts": [ + 19.352, + 19.346, + 19.3376 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_type": "gemma3 4B Q4_K - Medium", + "model_size": 2483352832, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T06:53:30Z", + "avg_ns": 119654388823, + "stddev_ns": 242915973, + "avg_ts": 4.279002, + "stddev_ts": 0.008679, + "samples_ns": [ + 119929263891, + 119468569234, + 119565333345 + ], + "samples_ts": [ + 4.26918, + 4.28565, + 4.28218 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q4_K_M.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q4_K_M", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1007 + }, + { + "timestamp_utc": "2025-12-10T07:04:52.678948+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:00:27Z\",\n \"avg_ns\": 28354921650,\n \"stddev_ns\": 20104684,\n \"avg_ts\": 4.514209,\n \"stddev_ts\": 0.003200,\n \"samples_ns\": [ 28377742727, 28347194703, 28339827521 ],\n \"samples_ts\": [ 4.51058, 4.51544, 4.51661 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:02:27Z\",\n \"avg_ns\": 48323447678,\n \"stddev_ns\": 15676503,\n \"avg_ts\": 2.648818,\n \"stddev_ts\": 0.000859,\n \"samples_ns\": [ 48324053948, 48338810710, 48307478377 ],\n \"samples_ts\": [ 2.64878, 2.64798, 2.64969 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T07:00:27Z", + "avg_ns": 28354921650, + "stddev_ns": 20104684, + "avg_ts": 4.514209, + "stddev_ts": 0.0032, + "samples_ns": [ + 28377742727, + 28347194703, + 28339827521 + ], + "samples_ts": [ + 4.51058, + 4.51544, + 4.51661 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T07:02:27Z", + "avg_ns": 48323447678, + "stddev_ns": 15676503, + "avg_ts": 2.648818, + "stddev_ts": 0.000859, + "samples_ns": [ + 48324053948, + 48338810710, + 48307478377 + ], + "samples_ts": [ + 2.64878, + 2.64798, + 2.64969 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1008 + }, + { + "timestamp_utc": "2025-12-10T07:16:31.690798+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:04:53Z\",\n \"avg_ns\": 28288675998,\n \"stddev_ns\": 890264,\n \"avg_ts\": 4.524779,\n \"stddev_ts\": 0.000137,\n \"samples_ns\": [ 28289559138, 28287845788, 28288623070 ],\n \"samples_ts\": [ 4.52464, 4.52491, 4.52479 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:06:47Z\",\n \"avg_ns\": 194716588535,\n \"stddev_ns\": 17888675,\n \"avg_ts\": 2.629463,\n \"stddev_ts\": 0.000241,\n \"samples_ns\": [ 194737229674, 194706537831, 194705998102 ],\n \"samples_ts\": [ 2.62918, 2.6296, 2.62961 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T07:04:53Z", + "avg_ns": 28288675998, + "stddev_ns": 890264, + "avg_ts": 4.524779, + "stddev_ts": 0.000137, + "samples_ns": [ + 28289559138, + 28287845788, + 28288623070 + ], + "samples_ts": [ + 4.52464, + 4.52491, + 4.52479 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T07:06:47Z", + "avg_ns": 194716588535, + "stddev_ns": 17888675, + "avg_ts": 2.629463, + "stddev_ts": 0.000241, + "samples_ns": [ + 194737229674, + 194706537831, + 194705998102 + ], + "samples_ts": [ + 2.62918, + 2.6296, + 2.62961 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1009 + }, + { + "timestamp_utc": "2025-12-10T07:26:31.070568+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:16:32Z\",\n \"avg_ns\": 113862299153,\n \"stddev_ns\": 2035882,\n \"avg_ts\": 4.496660,\n \"stddev_ts\": 0.000079,\n \"samples_ns\": [ 113863827744, 113863044332, 113860025384 ],\n \"samples_ts\": [ 4.4966, 4.49663, 4.49675 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:24:08Z\",\n \"avg_ns\": 47432570678,\n \"stddev_ns\": 18083628,\n \"avg_ts\": 2.698568,\n \"stddev_ts\": 0.001029,\n \"samples_ns\": [ 47452427271, 47428237414, 47417047349 ],\n \"samples_ts\": [ 2.69744, 2.69881, 2.69945 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T07:16:32Z", + "avg_ns": 113862299153, + "stddev_ns": 2035882, + "avg_ts": 4.49666, + "stddev_ts": 7.9e-05, + "samples_ns": [ + 113863827744, + 113863044332, + 113860025384 + ], + "samples_ts": [ + 4.4966, + 4.49663, + 4.49675 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T07:24:08Z", + "avg_ns": 47432570678, + "stddev_ns": 18083628, + "avg_ts": 2.698568, + "stddev_ts": 0.001029, + "samples_ns": [ + 47452427271, + 47428237414, + 47417047349 + ], + "samples_ts": [ + 2.69744, + 2.69881, + 2.69945 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1010 + }, + { + "timestamp_utc": "2025-12-10T07:43:48.234139+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:26:32Z\",\n \"avg_ns\": 113896241818,\n \"stddev_ns\": 2372456,\n \"avg_ts\": 4.495320,\n \"stddev_ts\": 0.000093,\n \"samples_ns\": [ 113898710986, 113894036635, 113895977834 ],\n \"samples_ts\": [ 4.49522, 4.49541, 4.49533 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:34:07Z\",\n \"avg_ns\": 193320552066,\n \"stddev_ns\": 5959073,\n \"avg_ts\": 2.648451,\n \"stddev_ts\": 0.000082,\n \"samples_ns\": [ 193327340328, 193318132706, 193316183164 ],\n \"samples_ts\": [ 2.64836, 2.64848, 2.64851 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T07:26:32Z", + "avg_ns": 113896241818, + "stddev_ns": 2372456, + "avg_ts": 4.49532, + "stddev_ts": 9.3e-05, + "samples_ns": [ + 113898710986, + 113894036635, + 113895977834 + ], + "samples_ts": [ + 4.49522, + 4.49541, + 4.49533 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T07:34:07Z", + "avg_ns": 193320552066, + "stddev_ns": 5959073, + "avg_ts": 2.648451, + "stddev_ts": 8.2e-05, + "samples_ns": [ + 193327340328, + 193318132706, + 193316183164 + ], + "samples_ts": [ + 2.64836, + 2.64848, + 2.64851 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1011 + }, + { + "timestamp_utc": "2025-12-10T07:48:07.906449+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:43:49Z\",\n \"avg_ns\": 28285784344,\n \"stddev_ns\": 289772,\n \"avg_ts\": 4.525241,\n \"stddev_ts\": 0.000026,\n \"samples_ns\": [ 28285830675, 28285921762, 28285600597 ],\n \"samples_ts\": [ 4.52523, 4.52522, 4.52527 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:45:42Z\",\n \"avg_ns\": 48294141795,\n \"stddev_ns\": 5758460,\n \"avg_ts\": 2.650425,\n \"stddev_ts\": 0.000316,\n \"samples_ns\": [ 48300770368, 48290429901, 48291225117 ],\n \"samples_ts\": [ 2.65006, 2.65063, 2.65059 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T07:43:49Z", + "avg_ns": 28285784344, + "stddev_ns": 289772, + "avg_ts": 4.525241, + "stddev_ts": 2.6e-05, + "samples_ns": [ + 28285830675, + 28285921762, + 28285600597 + ], + "samples_ts": [ + 4.52523, + 4.52522, + 4.52527 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T07:45:42Z", + "avg_ns": 48294141795, + "stddev_ns": 5758460, + "avg_ts": 2.650425, + "stddev_ts": 0.000316, + "samples_ns": [ + 48300770368, + 48290429901, + 48291225117 + ], + "samples_ts": [ + 2.65006, + 2.65063, + 2.65059 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1012 + }, + { + "timestamp_utc": "2025-12-10T07:59:40.402349+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:48:08Z\",\n \"avg_ns\": 28306082787,\n \"stddev_ns\": 913077,\n \"avg_ts\": 4.521996,\n \"stddev_ts\": 0.000141,\n \"samples_ns\": [ 28305941258, 28307026522, 28305280583 ],\n \"samples_ts\": [ 4.52202, 4.52185, 4.52212 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:50:02Z\",\n \"avg_ns\": 192554860831,\n \"stddev_ns\": 1938616,\n \"avg_ts\": 2.658982,\n \"stddev_ts\": 0.000027,\n \"samples_ns\": [ 192552643629, 192556236352, 192555702512 ],\n \"samples_ts\": [ 2.65901, 2.65896, 2.65897 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T07:48:08Z", + "avg_ns": 28306082787, + "stddev_ns": 913077, + "avg_ts": 4.521996, + "stddev_ts": 0.000141, + "samples_ns": [ + 28305941258, + 28307026522, + 28305280583 + ], + "samples_ts": [ + 4.52202, + 4.52185, + 4.52212 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T07:50:02Z", + "avg_ns": 192554860831, + "stddev_ns": 1938616, + "avg_ts": 2.658982, + "stddev_ts": 2.7e-05, + "samples_ns": [ + 192552643629, + 192556236352, + 192555702512 + ], + "samples_ts": [ + 2.65901, + 2.65896, + 2.65897 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1013 + }, + { + "timestamp_utc": "2025-12-10T08:09:41.151995+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T07:59:41Z\",\n \"avg_ns\": 114317990189,\n \"stddev_ns\": 1221122,\n \"avg_ts\": 4.478735,\n \"stddev_ts\": 0.000044,\n \"samples_ns\": [ 114319280153, 114317224788, 114317465628 ],\n \"samples_ts\": [ 4.47868, 4.47877, 4.47876 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:07:18Z\",\n \"avg_ns\": 47273823084,\n \"stddev_ns\": 10297396,\n \"avg_ts\": 2.707630,\n \"stddev_ts\": 0.000589,\n \"samples_ns\": [ 47285700223, 47268261374, 47267507657 ],\n \"samples_ts\": [ 2.70695, 2.70795, 2.70799 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T07:59:41Z", + "avg_ns": 114317990189, + "stddev_ns": 1221122, + "avg_ts": 4.478735, + "stddev_ts": 4.4e-05, + "samples_ns": [ + 114319280153, + 114317224788, + 114317465628 + ], + "samples_ts": [ + 4.47868, + 4.47877, + 4.47876 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T08:07:18Z", + "avg_ns": 47273823084, + "stddev_ns": 10297396, + "avg_ts": 2.70763, + "stddev_ts": 0.000589, + "samples_ns": [ + 47285700223, + 47268261374, + 47267507657 + ], + "samples_ts": [ + 2.70695, + 2.70795, + 2.70799 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1014 + }, + { + "timestamp_utc": "2025-12-10T08:26:56.582489+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:09:42Z\",\n \"avg_ns\": 114313171466,\n \"stddev_ns\": 2129645,\n \"avg_ts\": 4.478924,\n \"stddev_ts\": 0.000081,\n \"samples_ns\": [ 114311180842, 114315322124, 114313011434 ],\n \"samples_ts\": [ 4.479, 4.47884, 4.47893 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:17:19Z\",\n \"avg_ns\": 192186723699,\n \"stddev_ns\": 4645096,\n \"avg_ts\": 2.664076,\n \"stddev_ts\": 0.000064,\n \"samples_ns\": [ 192187279703, 192181867413, 192191023983 ],\n \"samples_ts\": [ 2.66407, 2.66414, 2.66402 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T08:09:42Z", + "avg_ns": 114313171466, + "stddev_ns": 2129645, + "avg_ts": 4.478924, + "stddev_ts": 8.1e-05, + "samples_ns": [ + 114311180842, + 114315322124, + 114313011434 + ], + "samples_ts": [ + 4.479, + 4.47884, + 4.47893 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T08:17:19Z", + "avg_ns": 192186723699, + "stddev_ns": 4645096, + "avg_ts": 2.664076, + "stddev_ts": 6.4e-05, + "samples_ns": [ + 192187279703, + 192181867413, + 192191023983 + ], + "samples_ts": [ + 2.66407, + 2.66414, + 2.66402 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1015 + }, + { + "timestamp_utc": "2025-12-10T08:31:15.998691+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:26:57Z\",\n \"avg_ns\": 28314092830,\n \"stddev_ns\": 722905,\n \"avg_ts\": 4.520717,\n \"stddev_ts\": 0.000115,\n \"samples_ns\": [ 28313591396, 28313765607, 28314921487 ],\n \"samples_ts\": [ 4.5208, 4.52077, 4.52058 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:28:50Z\",\n \"avg_ns\": 48186277713,\n \"stddev_ns\": 3827496,\n \"avg_ts\": 2.656358,\n \"stddev_ts\": 0.000210,\n \"samples_ns\": [ 48190654106, 48184523973, 48183655062 ],\n \"samples_ts\": [ 2.65612, 2.65645, 2.6565 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T08:26:57Z", + "avg_ns": 28314092830, + "stddev_ns": 722905, + "avg_ts": 4.520717, + "stddev_ts": 0.000115, + "samples_ns": [ + 28313591396, + 28313765607, + 28314921487 + ], + "samples_ts": [ + 4.5208, + 4.52077, + 4.52058 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T08:28:50Z", + "avg_ns": 48186277713, + "stddev_ns": 3827496, + "avg_ts": 2.656358, + "stddev_ts": 0.00021, + "samples_ns": [ + 48190654106, + 48184523973, + 48183655062 + ], + "samples_ts": [ + 2.65612, + 2.65645, + 2.6565 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1016 + }, + { + "timestamp_utc": "2025-12-10T08:42:50.638153+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:31:17Z\",\n \"avg_ns\": 28312856345,\n \"stddev_ns\": 1248399,\n \"avg_ts\": 4.520914,\n \"stddev_ts\": 0.000199,\n \"samples_ns\": [ 28312958757, 28314050384, 28311559894 ],\n \"samples_ts\": [ 4.5209, 4.52072, 4.52112 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:33:10Z\",\n \"avg_ns\": 193261954999,\n \"stddev_ns\": 3167436,\n \"avg_ts\": 2.649254,\n \"stddev_ts\": 0.000043,\n \"samples_ns\": [ 193263481316, 193264070281, 193258313400 ],\n \"samples_ts\": [ 2.64923, 2.64922, 2.6493 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T08:31:17Z", + "avg_ns": 28312856345, + "stddev_ns": 1248399, + "avg_ts": 4.520914, + "stddev_ts": 0.000199, + "samples_ns": [ + 28312958757, + 28314050384, + 28311559894 + ], + "samples_ts": [ + 4.5209, + 4.52072, + 4.52112 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T08:33:10Z", + "avg_ns": 193261954999, + "stddev_ns": 3167436, + "avg_ts": 2.649254, + "stddev_ts": 4.3e-05, + "samples_ns": [ + 193263481316, + 193264070281, + 193258313400 + ], + "samples_ts": [ + 2.64923, + 2.64922, + 2.6493 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1017 + }, + { + "timestamp_utc": "2025-12-10T08:53:01.982055+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:42:51Z\",\n \"avg_ns\": 116653152357,\n \"stddev_ns\": 1811918,\n \"avg_ts\": 4.389080,\n \"stddev_ts\": 0.000068,\n \"samples_ns\": [ 116651072916, 116654392056, 116653992099 ],\n \"samples_ts\": [ 4.38916, 4.38903, 4.38905 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:50:38Z\",\n \"avg_ns\": 47702418967,\n \"stddev_ns\": 11057097,\n \"avg_ts\": 2.683302,\n \"stddev_ts\": 0.000622,\n \"samples_ns\": [ 47714266780, 47700615909, 47692374212 ],\n \"samples_ts\": [ 2.68264, 2.6834, 2.68387 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T08:42:51Z", + "avg_ns": 116653152357, + "stddev_ns": 1811918, + "avg_ts": 4.38908, + "stddev_ts": 6.8e-05, + "samples_ns": [ + 116651072916, + 116654392056, + 116653992099 + ], + "samples_ts": [ + 4.38916, + 4.38903, + 4.38905 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T08:50:38Z", + "avg_ns": 47702418967, + "stddev_ns": 11057097, + "avg_ts": 2.683302, + "stddev_ts": 0.000622, + "samples_ns": [ + 47714266780, + 47700615909, + 47692374212 + ], + "samples_ts": [ + 2.68264, + 2.6834, + 2.68387 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1018 + }, + { + "timestamp_utc": "2025-12-10T09:10:26.970063+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T08:53:03Z\",\n \"avg_ns\": 116561920921,\n \"stddev_ns\": 10859049,\n \"avg_ts\": 4.392515,\n \"stddev_ts\": 0.000409,\n \"samples_ns\": [ 116549546406, 116569793220, 116566423139 ],\n \"samples_ts\": [ 4.39298, 4.39222, 4.39235 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:00:49Z\",\n \"avg_ns\": 192374335005,\n \"stddev_ns\": 12515998,\n \"avg_ts\": 2.661478,\n \"stddev_ts\": 0.000173,\n \"samples_ns\": [ 192388763289, 192366549613, 192367692114 ],\n \"samples_ts\": [ 2.66128, 2.66159, 2.66157 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T08:53:03Z", + "avg_ns": 116561920921, + "stddev_ns": 10859049, + "avg_ts": 4.392515, + "stddev_ts": 0.000409, + "samples_ns": [ + 116549546406, + 116569793220, + 116566423139 + ], + "samples_ts": [ + 4.39298, + 4.39222, + 4.39235 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T09:00:49Z", + "avg_ns": 192374335005, + "stddev_ns": 12515998, + "avg_ts": 2.661478, + "stddev_ts": 0.000173, + "samples_ns": [ + 192388763289, + 192366549613, + 192367692114 + ], + "samples_ts": [ + 2.66128, + 2.66159, + 2.66157 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1019 + }, + { + "timestamp_utc": "2025-12-10T09:14:46.193916+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:10:28Z\",\n \"avg_ns\": 28287051715,\n \"stddev_ns\": 806536,\n \"avg_ts\": 4.525039,\n \"stddev_ts\": 0.000129,\n \"samples_ns\": [ 28287644338, 28286133230, 28287377577 ],\n \"samples_ts\": [ 4.52494, 4.52519, 4.52499 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:12:21Z\",\n \"avg_ns\": 48151180555,\n \"stddev_ns\": 1298045,\n \"avg_ts\": 2.658294,\n \"stddev_ts\": 0.000070,\n \"samples_ns\": [ 48150270499, 48152619183, 48150651985 ],\n \"samples_ts\": [ 2.65834, 2.65821, 2.65832 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T09:10:28Z", + "avg_ns": 28287051715, + "stddev_ns": 806536, + "avg_ts": 4.525039, + "stddev_ts": 0.000129, + "samples_ns": [ + 28287644338, + 28286133230, + 28287377577 + ], + "samples_ts": [ + 4.52494, + 4.52519, + 4.52499 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T09:12:21Z", + "avg_ns": 48151180555, + "stddev_ns": 1298045, + "avg_ts": 2.658294, + "stddev_ts": 7e-05, + "samples_ns": [ + 48150270499, + 48152619183, + 48150651985 + ], + "samples_ts": [ + 2.65834, + 2.65821, + 2.65832 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1020 + }, + { + "timestamp_utc": "2025-12-10T09:26:22.042436+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:14:47Z\",\n \"avg_ns\": 28310224744,\n \"stddev_ns\": 3070694,\n \"avg_ts\": 4.521335,\n \"stddev_ts\": 0.000490,\n \"samples_ns\": [ 28308574510, 28308332011, 28313767711 ],\n \"samples_ts\": [ 4.5216, 4.52164, 4.52077 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:16:40Z\",\n \"avg_ns\": 193642723186,\n \"stddev_ns\": 11150906,\n \"avg_ts\": 2.644045,\n \"stddev_ts\": 0.000152,\n \"samples_ns\": [ 193633193501, 193640002183, 193654973875 ],\n \"samples_ts\": [ 2.64417, 2.64408, 2.64388 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T09:14:47Z", + "avg_ns": 28310224744, + "stddev_ns": 3070694, + "avg_ts": 4.521335, + "stddev_ts": 0.00049, + "samples_ns": [ + 28308574510, + 28308332011, + 28313767711 + ], + "samples_ts": [ + 4.5216, + 4.52164, + 4.52077 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T09:16:40Z", + "avg_ns": 193642723186, + "stddev_ns": 11150906, + "avg_ts": 2.644045, + "stddev_ts": 0.000152, + "samples_ns": [ + 193633193501, + 193640002183, + 193654973875 + ], + "samples_ts": [ + 2.64417, + 2.64408, + 2.64388 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1021 + }, + { + "timestamp_utc": "2025-12-10T09:36:23.854063+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:26:23Z\",\n \"avg_ns\": 113823717532,\n \"stddev_ns\": 1546363,\n \"avg_ts\": 4.498184,\n \"stddev_ts\": 0.000060,\n \"samples_ns\": [ 113822005992, 113824856832, 113824289773 ],\n \"samples_ts\": [ 4.49825, 4.49814, 4.49816 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:33:58Z\",\n \"avg_ns\": 48298799822,\n \"stddev_ns\": 1581608,\n \"avg_ts\": 2.650169,\n \"stddev_ts\": 0.000085,\n \"samples_ns\": [ 48300582254, 48297759977, 48298057237 ],\n \"samples_ts\": [ 2.65007, 2.65023, 2.65021 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T09:26:23Z", + "avg_ns": 113823717532, + "stddev_ns": 1546363, + "avg_ts": 4.498184, + "stddev_ts": 6e-05, + "samples_ns": [ + 113822005992, + 113824856832, + 113824289773 + ], + "samples_ts": [ + 4.49825, + 4.49814, + 4.49816 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T09:33:58Z", + "avg_ns": 48298799822, + "stddev_ns": 1581608, + "avg_ts": 2.650169, + "stddev_ts": 8.5e-05, + "samples_ns": [ + 48300582254, + 48297759977, + 48298057237 + ], + "samples_ts": [ + 2.65007, + 2.65023, + 2.65021 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1022 + }, + { + "timestamp_utc": "2025-12-10T09:53:46.389431+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:36:24Z\",\n \"avg_ns\": 113773208891,\n \"stddev_ns\": 5133625,\n \"avg_ts\": 4.500181,\n \"stddev_ts\": 0.000203,\n \"samples_ns\": [ 113778934302, 113769016199, 113771676172 ],\n \"samples_ts\": [ 4.49995, 4.50035, 4.50024 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:44:00Z\",\n \"avg_ns\": 195266568804,\n \"stddev_ns\": 13490958,\n \"avg_ts\": 2.622057,\n \"stddev_ts\": 0.000181,\n \"samples_ns\": [ 195259438845, 195258146981, 195282120587 ],\n \"samples_ts\": [ 2.62215, 2.62217, 2.62185 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T09:36:24Z", + "avg_ns": 113773208891, + "stddev_ns": 5133625, + "avg_ts": 4.500181, + "stddev_ts": 0.000203, + "samples_ns": [ + 113778934302, + 113769016199, + 113771676172 + ], + "samples_ts": [ + 4.49995, + 4.50035, + 4.50024 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T09:44:00Z", + "avg_ns": 195266568804, + "stddev_ns": 13490958, + "avg_ts": 2.622057, + "stddev_ts": 0.000181, + "samples_ns": [ + 195259438845, + 195258146981, + 195282120587 + ], + "samples_ts": [ + 2.62215, + 2.62217, + 2.62185 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1023 + }, + { + "timestamp_utc": "2025-12-10T09:58:05.639569+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:53:47Z\",\n \"avg_ns\": 28287918358,\n \"stddev_ns\": 1632037,\n \"avg_ts\": 4.524900,\n \"stddev_ts\": 0.000261,\n \"samples_ns\": [ 28289760293, 28287342358, 28286652423 ],\n \"samples_ts\": [ 4.52461, 4.52499, 4.5251 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:55:40Z\",\n \"avg_ns\": 48150041697,\n \"stddev_ns\": 3441524,\n \"avg_ts\": 2.658357,\n \"stddev_ts\": 0.000189,\n \"samples_ns\": [ 48153993527, 48148253065, 48147878501 ],\n \"samples_ts\": [ 2.65814, 2.65846, 2.65848 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T09:53:47Z", + "avg_ns": 28287918358, + "stddev_ns": 1632037, + "avg_ts": 4.5249, + "stddev_ts": 0.000261, + "samples_ns": [ + 28289760293, + 28287342358, + 28286652423 + ], + "samples_ts": [ + 4.52461, + 4.52499, + 4.5251 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T09:55:40Z", + "avg_ns": 48150041697, + "stddev_ns": 3441524, + "avg_ts": 2.658357, + "stddev_ts": 0.000189, + "samples_ns": [ + 48153993527, + 48148253065, + 48147878501 + ], + "samples_ts": [ + 2.65814, + 2.65846, + 2.65848 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1024 + }, + { + "timestamp_utc": "2025-12-10T10:09:40.291890+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:58:06Z\",\n \"avg_ns\": 28291193667,\n \"stddev_ns\": 3171800,\n \"avg_ts\": 4.524376,\n \"stddev_ts\": 0.000506,\n \"samples_ns\": [ 28288968876, 28294814342, 28289797785 ],\n \"samples_ts\": [ 4.52473, 4.5238, 4.5246 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T09:59:59Z\",\n \"avg_ns\": 193292733446,\n \"stddev_ns\": 4148573,\n \"avg_ts\": 2.648832,\n \"stddev_ts\": 0.000056,\n \"samples_ns\": [ 193297454681, 193290046428, 193290699231 ],\n \"samples_ts\": [ 2.64877, 2.64887, 2.64886 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T09:58:06Z", + "avg_ns": 28291193667, + "stddev_ns": 3171800, + "avg_ts": 4.524376, + "stddev_ts": 0.000506, + "samples_ns": [ + 28288968876, + 28294814342, + 28289797785 + ], + "samples_ts": [ + 4.52473, + 4.5238, + 4.5246 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T09:59:59Z", + "avg_ns": 193292733446, + "stddev_ns": 4148573, + "avg_ts": 2.648832, + "stddev_ts": 5.6e-05, + "samples_ns": [ + 193297454681, + 193290046428, + 193290699231 + ], + "samples_ts": [ + 2.64877, + 2.64887, + 2.64886 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1025 + }, + { + "timestamp_utc": "2025-12-10T10:19:42.052741+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:09:41Z\",\n \"avg_ns\": 114541715605,\n \"stddev_ns\": 1425789,\n \"avg_ts\": 4.469987,\n \"stddev_ts\": 0.000052,\n \"samples_ns\": [ 114540897418, 114543265629, 114540983770 ],\n \"samples_ts\": [ 4.47002, 4.46993, 4.47002 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:17:19Z\",\n \"avg_ns\": 47334401021,\n \"stddev_ns\": 6544553,\n \"avg_ts\": 2.704164,\n \"stddev_ts\": 0.000373,\n \"samples_ns\": [ 47341534351, 47328695848, 47332972866 ],\n \"samples_ts\": [ 2.70376, 2.70449, 2.70425 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T10:09:41Z", + "avg_ns": 114541715605, + "stddev_ns": 1425789, + "avg_ts": 4.469987, + "stddev_ts": 5.2e-05, + "samples_ns": [ + 114540897418, + 114543265629, + 114540983770 + ], + "samples_ts": [ + 4.47002, + 4.46993, + 4.47002 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T10:17:19Z", + "avg_ns": 47334401021, + "stddev_ns": 6544553, + "avg_ts": 2.704164, + "stddev_ts": 0.000373, + "samples_ns": [ + 47341534351, + 47328695848, + 47332972866 + ], + "samples_ts": [ + 2.70376, + 2.70449, + 2.70425 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1026 + }, + { + "timestamp_utc": "2025-12-10T10:36:55.931333+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:19:43Z\",\n \"avg_ns\": 114431303235,\n \"stddev_ns\": 3156704,\n \"avg_ts\": 4.474300,\n \"stddev_ts\": 0.000122,\n \"samples_ns\": [ 114433595429, 114432564483, 114427749795 ],\n \"samples_ts\": [ 4.47421, 4.47425, 4.47444 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:27:20Z\",\n \"avg_ns\": 191508751758,\n \"stddev_ns\": 5315694,\n \"avg_ts\": 2.673507,\n \"stddev_ts\": 0.000074,\n \"samples_ns\": [ 191511032914, 191502715179, 191512507183 ],\n \"samples_ts\": [ 2.67348, 2.67359, 2.67345 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T10:19:43Z", + "avg_ns": 114431303235, + "stddev_ns": 3156704, + "avg_ts": 4.4743, + "stddev_ts": 0.000122, + "samples_ns": [ + 114433595429, + 114432564483, + 114427749795 + ], + "samples_ts": [ + 4.47421, + 4.47425, + 4.47444 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T10:27:20Z", + "avg_ns": 191508751758, + "stddev_ns": 5315694, + "avg_ts": 2.673507, + "stddev_ts": 7.4e-05, + "samples_ns": [ + 191511032914, + 191502715179, + 191512507183 + ], + "samples_ts": [ + 2.67348, + 2.67359, + 2.67345 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1027 + }, + { + "timestamp_utc": "2025-12-10T10:41:15.086833+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:36:57Z\",\n \"avg_ns\": 28288246419,\n \"stddev_ns\": 2360818,\n \"avg_ts\": 4.524847,\n \"stddev_ts\": 0.000378,\n \"samples_ns\": [ 28290759655, 28287904239, 28286075363 ],\n \"samples_ts\": [ 4.52445, 4.5249, 4.52519 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:38:50Z\",\n \"avg_ns\": 48124612925,\n \"stddev_ns\": 909863,\n \"avg_ts\": 2.659762,\n \"stddev_ts\": 0.000050,\n \"samples_ns\": [ 48123616323, 48125399218, 48124823234 ],\n \"samples_ts\": [ 2.65982, 2.65972, 2.65975 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T10:36:57Z", + "avg_ns": 28288246419, + "stddev_ns": 2360818, + "avg_ts": 4.524847, + "stddev_ts": 0.000378, + "samples_ns": [ + 28290759655, + 28287904239, + 28286075363 + ], + "samples_ts": [ + 4.52445, + 4.5249, + 4.52519 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T10:38:50Z", + "avg_ns": 48124612925, + "stddev_ns": 909863, + "avg_ts": 2.659762, + "stddev_ts": 5e-05, + "samples_ns": [ + 48123616323, + 48125399218, + 48124823234 + ], + "samples_ts": [ + 2.65982, + 2.65972, + 2.65975 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1028 + }, + { + "timestamp_utc": "2025-12-10T10:52:52.975907+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:41:16Z\",\n \"avg_ns\": 28285355368,\n \"stddev_ns\": 2539615,\n \"avg_ts\": 4.525310,\n \"stddev_ts\": 0.000406,\n \"samples_ns\": [ 28287520067, 28285986262, 28282559775 ],\n \"samples_ts\": [ 4.52496, 4.52521, 4.52576 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:43:09Z\",\n \"avg_ns\": 194374181917,\n \"stddev_ns\": 2026890,\n \"avg_ts\": 2.634095,\n \"stddev_ts\": 0.000027,\n \"samples_ns\": [ 194372687532, 194376489044, 194373369175 ],\n \"samples_ts\": [ 2.63411, 2.63406, 2.63411 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T10:41:16Z", + "avg_ns": 28285355368, + "stddev_ns": 2539615, + "avg_ts": 4.52531, + "stddev_ts": 0.000406, + "samples_ns": [ + 28287520067, + 28285986262, + 28282559775 + ], + "samples_ts": [ + 4.52496, + 4.52521, + 4.52576 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T10:43:09Z", + "avg_ns": 194374181917, + "stddev_ns": 2026890, + "avg_ts": 2.634095, + "stddev_ts": 2.7e-05, + "samples_ns": [ + 194372687532, + 194376489044, + 194373369175 + ], + "samples_ts": [ + 2.63411, + 2.63406, + 2.63411 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1029 + }, + { + "timestamp_utc": "2025-12-10T11:03:03.763592+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T10:52:54Z\",\n \"avg_ns\": 116666356459,\n \"stddev_ns\": 3664039,\n \"avg_ts\": 4.388583,\n \"stddev_ts\": 0.000138,\n \"samples_ns\": [ 116669740512, 116662465268, 116666863597 ],\n \"samples_ts\": [ 4.38846, 4.38873, 4.38856 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:00:41Z\",\n \"avg_ns\": 47398431503,\n \"stddev_ns\": 6920746,\n \"avg_ts\": 2.700511,\n \"stddev_ts\": 0.000394,\n \"samples_ns\": [ 47406239919, 47395967234, 47393087358 ],\n \"samples_ts\": [ 2.70007, 2.70065, 2.70082 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T10:52:54Z", + "avg_ns": 116666356459, + "stddev_ns": 3664039, + "avg_ts": 4.388583, + "stddev_ts": 0.000138, + "samples_ns": [ + 116669740512, + 116662465268, + 116666863597 + ], + "samples_ts": [ + 4.38846, + 4.38873, + 4.38856 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T11:00:41Z", + "avg_ns": 47398431503, + "stddev_ns": 6920746, + "avg_ts": 2.700511, + "stddev_ts": 0.000394, + "samples_ns": [ + 47406239919, + 47395967234, + 47393087358 + ], + "samples_ts": [ + 2.70007, + 2.70065, + 2.70082 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1030 + }, + { + "timestamp_utc": "2025-12-10T11:20:25.296650+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:03:04Z\",\n \"avg_ns\": 116378777847,\n \"stddev_ns\": 4442439,\n \"avg_ts\": 4.399428,\n \"stddev_ts\": 0.000167,\n \"samples_ns\": [ 116378805627, 116374334701, 116383193214 ],\n \"samples_ts\": [ 4.39943, 4.3996, 4.39926 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:10:50Z\",\n \"avg_ns\": 191457124205,\n \"stddev_ns\": 7818383,\n \"avg_ts\": 2.674228,\n \"stddev_ts\": 0.000109,\n \"samples_ns\": [ 191463857191, 191458929210, 191448586216 ],\n \"samples_ts\": [ 2.67413, 2.6742, 2.67435 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T11:03:04Z", + "avg_ns": 116378777847, + "stddev_ns": 4442439, + "avg_ts": 4.399428, + "stddev_ts": 0.000167, + "samples_ns": [ + 116378805627, + 116374334701, + 116383193214 + ], + "samples_ts": [ + 4.39943, + 4.3996, + 4.39926 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T11:10:50Z", + "avg_ns": 191457124205, + "stddev_ns": 7818383, + "avg_ts": 2.674228, + "stddev_ts": 0.000109, + "samples_ns": [ + 191463857191, + 191458929210, + 191448586216 + ], + "samples_ts": [ + 2.67413, + 2.6742, + 2.67435 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1031 + }, + { + "timestamp_utc": "2025-12-10T11:24:41.780316+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:20:26Z\",\n \"avg_ns\": 28287988518,\n \"stddev_ns\": 2073775,\n \"avg_ts\": 4.524889,\n \"stddev_ts\": 0.000331,\n \"samples_ns\": [ 28286532550, 28287078707, 28290354298 ],\n \"samples_ts\": [ 4.52512, 4.52503, 4.52451 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:22:19Z\",\n \"avg_ns\": 47246607016,\n \"stddev_ns\": 1980480,\n \"avg_ts\": 2.709189,\n \"stddev_ts\": 0.000112,\n \"samples_ns\": [ 47248850881, 47245257894, 47245712275 ],\n \"samples_ts\": [ 2.70906, 2.70927, 2.70924 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T11:20:26Z", + "avg_ns": 28287988518, + "stddev_ns": 2073775, + "avg_ts": 4.524889, + "stddev_ts": 0.000331, + "samples_ns": [ + 28286532550, + 28287078707, + 28290354298 + ], + "samples_ts": [ + 4.52512, + 4.52503, + 4.52451 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T11:22:19Z", + "avg_ns": 47246607016, + "stddev_ns": 1980480, + "avg_ts": 2.709189, + "stddev_ts": 0.000112, + "samples_ns": [ + 47248850881, + 47245257894, + 47245712275 + ], + "samples_ts": [ + 2.70906, + 2.70927, + 2.70924 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1032 + }, + { + "timestamp_utc": "2025-12-10T11:36:10.349612+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:24:42Z\",\n \"avg_ns\": 28283319145,\n \"stddev_ns\": 735242,\n \"avg_ts\": 4.525636,\n \"stddev_ts\": 0.000115,\n \"samples_ns\": [ 28284138350, 28282814799, 28283004287 ],\n \"samples_ts\": [ 4.5255, 4.52572, 4.52569 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:26:36Z\",\n \"avg_ns\": 191276328777,\n \"stddev_ns\": 5641674,\n \"avg_ts\": 2.676756,\n \"stddev_ts\": 0.000079,\n \"samples_ns\": [ 191280383264, 191278717340, 191269885727 ],\n \"samples_ts\": [ 2.6767, 2.67672, 2.67685 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T11:24:42Z", + "avg_ns": 28283319145, + "stddev_ns": 735242, + "avg_ts": 4.525636, + "stddev_ts": 0.000115, + "samples_ns": [ + 28284138350, + 28282814799, + 28283004287 + ], + "samples_ts": [ + 4.5255, + 4.52572, + 4.52569 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T11:26:36Z", + "avg_ns": 191276328777, + "stddev_ns": 5641674, + "avg_ts": 2.676756, + "stddev_ts": 7.9e-05, + "samples_ns": [ + 191280383264, + 191278717340, + 191269885727 + ], + "samples_ts": [ + 2.6767, + 2.67672, + 2.67685 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1033 + }, + { + "timestamp_utc": "2025-12-10T11:46:10.570300+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:36:11Z\",\n \"avg_ns\": 113920390899,\n \"stddev_ns\": 1975687,\n \"avg_ts\": 4.494367,\n \"stddev_ts\": 0.000078,\n \"samples_ns\": [ 113918409241, 113920402895, 113922360561 ],\n \"samples_ts\": [ 4.49444, 4.49437, 4.49429 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:43:47Z\",\n \"avg_ns\": 47635933837,\n \"stddev_ns\": 4185712,\n \"avg_ts\": 2.687047,\n \"stddev_ts\": 0.000235,\n \"samples_ns\": [ 47640577255, 47634731918, 47632492340 ],\n \"samples_ts\": [ 2.68679, 2.68711, 2.68724 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T11:36:11Z", + "avg_ns": 113920390899, + "stddev_ns": 1975687, + "avg_ts": 4.494367, + "stddev_ts": 7.8e-05, + "samples_ns": [ + 113918409241, + 113920402895, + 113922360561 + ], + "samples_ts": [ + 4.49444, + 4.49437, + 4.49429 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T11:43:47Z", + "avg_ns": 47635933837, + "stddev_ns": 4185712, + "avg_ts": 2.687047, + "stddev_ts": 0.000235, + "samples_ns": [ + 47640577255, + 47634731918, + 47632492340 + ], + "samples_ts": [ + 2.68679, + 2.68711, + 2.68724 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1034 + }, + { + "timestamp_utc": "2025-12-10T12:03:32.926992+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:46:11Z\",\n \"avg_ns\": 113856060609,\n \"stddev_ns\": 1731990,\n \"avg_ts\": 4.496906,\n \"stddev_ts\": 0.000066,\n \"samples_ns\": [ 113854475507, 113855911000, 113857795322 ],\n \"samples_ts\": [ 4.49697, 4.49691, 4.49684 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T11:53:47Z\",\n \"avg_ns\": 195105152625,\n \"stddev_ns\": 2275296,\n \"avg_ts\": 2.624226,\n \"stddev_ts\": 0.000029,\n \"samples_ns\": [ 195103392721, 195107602232, 195104462924 ],\n \"samples_ts\": [ 2.62425, 2.62419, 2.62424 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T11:46:11Z", + "avg_ns": 113856060609, + "stddev_ns": 1731990, + "avg_ts": 4.496906, + "stddev_ts": 6.6e-05, + "samples_ns": [ + 113854475507, + 113855911000, + 113857795322 + ], + "samples_ts": [ + 4.49697, + 4.49691, + 4.49684 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T11:53:47Z", + "avg_ns": 195105152625, + "stddev_ns": 2275296, + "avg_ts": 2.624226, + "stddev_ts": 2.9e-05, + "samples_ns": [ + 195103392721, + 195107602232, + 195104462924 + ], + "samples_ts": [ + 2.62425, + 2.62419, + 2.62424 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1035 + }, + { + "timestamp_utc": "2025-12-10T12:07:49.605769+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:03:34Z\",\n \"avg_ns\": 28281709732,\n \"stddev_ns\": 1122853,\n \"avg_ts\": 4.525893,\n \"stddev_ts\": 0.000180,\n \"samples_ns\": [ 28282956143, 28281395790, 28280777263 ],\n \"samples_ts\": [ 4.52569, 4.52594, 4.52604 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:05:27Z\",\n \"avg_ns\": 47317440243,\n \"stddev_ns\": 3463735,\n \"avg_ts\": 2.705134,\n \"stddev_ts\": 0.000198,\n \"samples_ns\": [ 47319651657, 47313456624, 47319212449 ],\n \"samples_ts\": [ 2.70501, 2.70536, 2.70503 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T12:03:34Z", + "avg_ns": 28281709732, + "stddev_ns": 1122853, + "avg_ts": 4.525893, + "stddev_ts": 0.00018, + "samples_ns": [ + 28282956143, + 28281395790, + 28280777263 + ], + "samples_ts": [ + 4.52569, + 4.52594, + 4.52604 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T12:05:27Z", + "avg_ns": 47317440243, + "stddev_ns": 3463735, + "avg_ts": 2.705134, + "stddev_ts": 0.000198, + "samples_ns": [ + 47319651657, + 47313456624, + 47319212449 + ], + "samples_ts": [ + 2.70501, + 2.70536, + 2.70503 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1036 + }, + { + "timestamp_utc": "2025-12-10T12:19:19.618345+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:07:50Z\",\n \"avg_ns\": 28294889738,\n \"stddev_ns\": 1003588,\n \"avg_ts\": 4.523785,\n \"stddev_ts\": 0.000158,\n \"samples_ns\": [ 28295811352, 28295013624, 28293844239 ],\n \"samples_ts\": [ 4.52364, 4.52377, 4.52395 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:09:43Z\",\n \"avg_ns\": 191720234560,\n \"stddev_ns\": 12902426,\n \"avg_ts\": 2.670558,\n \"stddev_ts\": 0.000180,\n \"samples_ns\": [ 191727228874, 191705345200, 191728129606 ],\n \"samples_ts\": [ 2.67046, 2.67077, 2.67045 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T12:07:50Z", + "avg_ns": 28294889738, + "stddev_ns": 1003588, + "avg_ts": 4.523785, + "stddev_ts": 0.000158, + "samples_ns": [ + 28295811352, + 28295013624, + 28293844239 + ], + "samples_ts": [ + 4.52364, + 4.52377, + 4.52395 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T12:09:43Z", + "avg_ns": 191720234560, + "stddev_ns": 12902426, + "avg_ts": 2.670558, + "stddev_ts": 0.00018, + "samples_ns": [ + 191727228874, + 191705345200, + 191728129606 + ], + "samples_ts": [ + 2.67046, + 2.67077, + 2.67045 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1037 + }, + { + "timestamp_utc": "2025-12-10T12:29:21.385306+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:19:20Z\",\n \"avg_ns\": 114303754945,\n \"stddev_ns\": 2886462,\n \"avg_ts\": 4.479293,\n \"stddev_ts\": 0.000113,\n \"samples_ns\": [ 114306558978, 114303913286, 114300792571 ],\n \"samples_ts\": [ 4.47918, 4.47929, 4.47941 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:26:57Z\",\n \"avg_ns\": 47641443797,\n \"stddev_ns\": 4124705,\n \"avg_ts\": 2.686736,\n \"stddev_ts\": 0.000233,\n \"samples_ns\": [ 47645986791, 47640410887, 47637933713 ],\n \"samples_ts\": [ 2.68648, 2.68679, 2.68693 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T12:19:20Z", + "avg_ns": 114303754945, + "stddev_ns": 2886462, + "avg_ts": 4.479293, + "stddev_ts": 0.000113, + "samples_ns": [ + 114306558978, + 114303913286, + 114300792571 + ], + "samples_ts": [ + 4.47918, + 4.47929, + 4.47941 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T12:26:57Z", + "avg_ns": 47641443797, + "stddev_ns": 4124705, + "avg_ts": 2.686736, + "stddev_ts": 0.000233, + "samples_ns": [ + 47645986791, + 47640410887, + 47637933713 + ], + "samples_ts": [ + 2.68648, + 2.68679, + 2.68693 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1038 + }, + { + "timestamp_utc": "2025-12-10T12:46:41.793867+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:29:22Z\",\n \"avg_ns\": 114371590642,\n \"stddev_ns\": 2649224,\n \"avg_ts\": 4.476636,\n \"stddev_ts\": 0.000103,\n \"samples_ns\": [ 114374571701, 114369611251, 114370588975 ],\n \"samples_ts\": [ 4.47652, 4.47671, 4.47668 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:37:00Z\",\n \"avg_ns\": 193753516122,\n \"stddev_ns\": 3032953,\n \"avg_ts\": 2.642533,\n \"stddev_ts\": 0.000041,\n \"samples_ns\": [ 193756783129, 193750790003, 193752975234 ],\n \"samples_ts\": [ 2.64249, 2.64257, 2.64254 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T12:29:22Z", + "avg_ns": 114371590642, + "stddev_ns": 2649224, + "avg_ts": 4.476636, + "stddev_ts": 0.000103, + "samples_ns": [ + 114374571701, + 114369611251, + 114370588975 + ], + "samples_ts": [ + 4.47652, + 4.47671, + 4.47668 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T12:37:00Z", + "avg_ns": 193753516122, + "stddev_ns": 3032953, + "avg_ts": 2.642533, + "stddev_ts": 4.1e-05, + "samples_ns": [ + 193756783129, + 193750790003, + 193752975234 + ], + "samples_ts": [ + 2.64249, + 2.64257, + 2.64254 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1039 + }, + { + "timestamp_utc": "2025-12-10T12:50:59.271454+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:46:42Z\",\n \"avg_ns\": 28290007775,\n \"stddev_ns\": 809400,\n \"avg_ts\": 4.524566,\n \"stddev_ts\": 0.000129,\n \"samples_ns\": [ 28289161152, 28290088228, 28290773945 ],\n \"samples_ts\": [ 4.5247, 4.52455, 4.52444 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:48:36Z\",\n \"avg_ns\": 47571194923,\n \"stddev_ns\": 3626578,\n \"avg_ts\": 2.690704,\n \"stddev_ts\": 0.000204,\n \"samples_ns\": [ 47572898389, 47567044618, 47573641764 ],\n \"samples_ts\": [ 2.69061, 2.69094, 2.69057 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T12:46:42Z", + "avg_ns": 28290007775, + "stddev_ns": 809400, + "avg_ts": 4.524566, + "stddev_ts": 0.000129, + "samples_ns": [ + 28289161152, + 28290088228, + 28290773945 + ], + "samples_ts": [ + 4.5247, + 4.52455, + 4.52444 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T12:48:36Z", + "avg_ns": 47571194923, + "stddev_ns": 3626578, + "avg_ts": 2.690704, + "stddev_ts": 0.000204, + "samples_ns": [ + 47572898389, + 47567044618, + 47573641764 + ], + "samples_ts": [ + 2.69061, + 2.69094, + 2.69057 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1040 + }, + { + "timestamp_utc": "2025-12-10T13:02:28.110042+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:51:00Z\",\n \"avg_ns\": 28289902729,\n \"stddev_ns\": 605638,\n \"avg_ts\": 4.524583,\n \"stddev_ts\": 0.000093,\n \"samples_ns\": [ 28290261292, 28290215467, 28289231429 ],\n \"samples_ts\": [ 4.52453, 4.52453, 4.52469 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T12:52:53Z\",\n \"avg_ns\": 191355000788,\n \"stddev_ns\": 5302140,\n \"avg_ts\": 2.675655,\n \"stddev_ts\": 0.000074,\n \"samples_ns\": [ 191358589644, 191348910690, 191357502030 ],\n \"samples_ts\": [ 2.67561, 2.67574, 2.67562 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T12:51:00Z", + "avg_ns": 28289902729, + "stddev_ns": 605638, + "avg_ts": 4.524583, + "stddev_ts": 9.3e-05, + "samples_ns": [ + 28290261292, + 28290215467, + 28289231429 + ], + "samples_ts": [ + 4.52453, + 4.52453, + 4.52469 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T12:52:53Z", + "avg_ns": 191355000788, + "stddev_ns": 5302140, + "avg_ts": 2.675655, + "stddev_ts": 7.4e-05, + "samples_ns": [ + 191358589644, + 191348910690, + 191357502030 + ], + "samples_ts": [ + 2.67561, + 2.67574, + 2.67562 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1041 + }, + { + "timestamp_utc": "2025-12-10T13:12:37.839489+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:02:29Z\",\n \"avg_ns\": 116659303551,\n \"stddev_ns\": 11314165,\n \"avg_ts\": 4.388848,\n \"stddev_ts\": 0.000426,\n \"samples_ns\": [ 116672348180, 116652157879, 116653404594 ],\n \"samples_ts\": [ 4.38836, 4.38912, 4.38907 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:10:15Z\",\n \"avg_ns\": 47138758733,\n \"stddev_ns\": 3667093,\n \"avg_ts\": 2.715388,\n \"stddev_ts\": 0.000211,\n \"samples_ns\": [ 47142131039, 47134876228, 47139268934 ],\n \"samples_ts\": [ 2.71519, 2.71561, 2.71536 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T13:02:29Z", + "avg_ns": 116659303551, + "stddev_ns": 11314165, + "avg_ts": 4.388848, + "stddev_ts": 0.000426, + "samples_ns": [ + 116672348180, + 116652157879, + 116653404594 + ], + "samples_ts": [ + 4.38836, + 4.38912, + 4.38907 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T13:10:15Z", + "avg_ns": 47138758733, + "stddev_ns": 3667093, + "avg_ts": 2.715388, + "stddev_ts": 0.000211, + "samples_ns": [ + 47142131039, + 47134876228, + 47139268934 + ], + "samples_ts": [ + 2.71519, + 2.71561, + 2.71536 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1042 + }, + { + "timestamp_utc": "2025-12-10T13:30:02.598675+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:12:38Z\",\n \"avg_ns\": 116702220876,\n \"stddev_ns\": 2140996,\n \"avg_ts\": 4.387234,\n \"stddev_ts\": 0.000078,\n \"samples_ns\": [ 116700452937, 116701688424, 116704521269 ],\n \"samples_ts\": [ 4.3873, 4.38725, 4.38715 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:20:25Z\",\n \"avg_ns\": 192091203951,\n \"stddev_ns\": 4126546,\n \"avg_ts\": 2.665401,\n \"stddev_ts\": 0.000057,\n \"samples_ns\": [ 192095953190, 192089163889, 192088494774 ],\n \"samples_ts\": [ 2.66533, 2.66543, 2.66544 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T13:12:38Z", + "avg_ns": 116702220876, + "stddev_ns": 2140996, + "avg_ts": 4.387234, + "stddev_ts": 7.8e-05, + "samples_ns": [ + 116700452937, + 116701688424, + 116704521269 + ], + "samples_ts": [ + 4.3873, + 4.38725, + 4.38715 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T13:20:25Z", + "avg_ns": 192091203951, + "stddev_ns": 4126546, + "avg_ts": 2.665401, + "stddev_ts": 5.7e-05, + "samples_ns": [ + 192095953190, + 192089163889, + 192088494774 + ], + "samples_ts": [ + 2.66533, + 2.66543, + 2.66544 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1043 + }, + { + "timestamp_utc": "2025-12-10T13:33:08.345544+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:30:03Z\",\n \"avg_ns\": 14293132065,\n \"stddev_ns\": 2578842,\n \"avg_ts\": 8.955350,\n \"stddev_ts\": 0.001612,\n \"samples_ns\": [ 14295693580, 14293155465, 14290547152 ],\n \"samples_ts\": [ 8.95375, 8.95534, 8.95697 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:31:00Z\",\n \"avg_ns\": 42335845241,\n \"stddev_ns\": 11314892,\n \"avg_ts\": 3.023443,\n \"stddev_ts\": 0.000808,\n \"samples_ns\": [ 42330644244, 42328065999, 42348825480 ],\n \"samples_ts\": [ 3.02381, 3.024, 3.02252 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T13:30:03Z", + "avg_ns": 14293132065, + "stddev_ns": 2578842, + "avg_ts": 8.95535, + "stddev_ts": 0.001612, + "samples_ns": [ + 14295693580, + 14293155465, + 14290547152 + ], + "samples_ts": [ + 8.95375, + 8.95534, + 8.95697 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T13:31:00Z", + "avg_ns": 42335845241, + "stddev_ns": 11314892, + "avg_ts": 3.023443, + "stddev_ts": 0.000808, + "samples_ns": [ + 42330644244, + 42328065999, + 42348825480 + ], + "samples_ts": [ + 3.02381, + 3.024, + 3.02252 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1044 + }, + { + "timestamp_utc": "2025-12-10T13:42:38.703945+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:33:09Z\",\n \"avg_ns\": 14290712627,\n \"stddev_ns\": 441507,\n \"avg_ts\": 8.956866,\n \"stddev_ts\": 0.000256,\n \"samples_ns\": [ 14291168801, 14290585914, 14290383168 ],\n \"samples_ts\": [ 8.95658, 8.95695, 8.95707 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:34:06Z\",\n \"avg_ns\": 170541362859,\n \"stddev_ns\": 63814690,\n \"avg_ts\": 3.002205,\n \"stddev_ts\": 0.001124,\n \"samples_ns\": [ 170471640459, 170555578615, 170596869504 ],\n \"samples_ts\": [ 3.00343, 3.00195, 3.00123 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T13:33:09Z", + "avg_ns": 14290712627, + "stddev_ns": 441507, + "avg_ts": 8.956866, + "stddev_ts": 0.000256, + "samples_ns": [ + 14291168801, + 14290585914, + 14290383168 + ], + "samples_ts": [ + 8.95658, + 8.95695, + 8.95707 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T13:34:06Z", + "avg_ns": 170541362859, + "stddev_ns": 63814690, + "avg_ts": 3.002205, + "stddev_ts": 0.001124, + "samples_ns": [ + 170471640459, + 170555578615, + 170596869504 + ], + "samples_ts": [ + 3.00343, + 3.00195, + 3.00123 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1045 + }, + { + "timestamp_utc": "2025-12-10T13:48:36.565805+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:42:39Z\",\n \"avg_ns\": 57424236161,\n \"stddev_ns\": 3742248,\n \"avg_ts\": 8.916096,\n \"stddev_ts\": 0.000581,\n \"samples_ns\": [ 57421475717, 57422737254, 57428495512 ],\n \"samples_ts\": [ 8.91652, 8.91633, 8.91543 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:46:29Z\",\n \"avg_ns\": 42199267377,\n \"stddev_ns\": 15388372,\n \"avg_ts\": 3.033228,\n \"stddev_ts\": 0.001106,\n \"samples_ns\": [ 42213964195, 42183274880, 42200563058 ],\n \"samples_ts\": [ 3.03217, 3.03438, 3.03313 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T13:42:39Z", + "avg_ns": 57424236161, + "stddev_ns": 3742248, + "avg_ts": 8.916096, + "stddev_ts": 0.000581, + "samples_ns": [ + 57421475717, + 57422737254, + 57428495512 + ], + "samples_ts": [ + 8.91652, + 8.91633, + 8.91543 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T13:46:29Z", + "avg_ns": 42199267377, + "stddev_ns": 15388372, + "avg_ts": 3.033228, + "stddev_ts": 0.001106, + "samples_ns": [ + 42213964195, + 42183274880, + 42200563058 + ], + "samples_ts": [ + 3.03217, + 3.03438, + 3.03313 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1046 + }, + { + "timestamp_utc": "2025-12-10T14:00:58.506304+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:48:37Z\",\n \"avg_ns\": 57453057470,\n \"stddev_ns\": 2192650,\n \"avg_ts\": 8.911623,\n \"stddev_ts\": 0.000340,\n \"samples_ns\": [ 57450526883, 57454253389, 57454392138 ],\n \"samples_ts\": [ 8.91202, 8.91144, 8.91142 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T13:52:27Z\",\n \"avg_ns\": 170181641229,\n \"stddev_ns\": 50821606,\n \"avg_ts\": 3.008550,\n \"stddev_ts\": 0.000898,\n \"samples_ns\": [ 170138424353, 170168868540, 170237630794 ],\n \"samples_ts\": [ 3.00931, 3.00878, 3.00756 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T13:48:37Z", + "avg_ns": 57453057470, + "stddev_ns": 2192650, + "avg_ts": 8.911623, + "stddev_ts": 0.00034, + "samples_ns": [ + 57450526883, + 57454253389, + 57454392138 + ], + "samples_ts": [ + 8.91202, + 8.91144, + 8.91142 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T13:52:27Z", + "avg_ns": 170181641229, + "stddev_ns": 50821606, + "avg_ts": 3.00855, + "stddev_ts": 0.000898, + "samples_ns": [ + 170138424353, + 170168868540, + 170237630794 + ], + "samples_ts": [ + 3.00931, + 3.00878, + 3.00756 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1047 + }, + { + "timestamp_utc": "2025-12-10T14:04:04.380726+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:00:59Z\",\n \"avg_ns\": 14290114637,\n \"stddev_ns\": 395960,\n \"avg_ts\": 8.957241,\n \"stddev_ts\": 0.000224,\n \"samples_ns\": [ 14289701614, 14290304924, 14290337375 ],\n \"samples_ts\": [ 8.9575, 8.95712, 8.9571 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:01:56Z\",\n \"avg_ns\": 42384680447,\n \"stddev_ns\": 16631510,\n \"avg_ts\": 3.019959,\n \"stddev_ts\": 0.001185,\n \"samples_ns\": [ 42403782351, 42376820745, 42373438247 ],\n \"samples_ts\": [ 3.0186, 3.02052, 3.02076 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T14:00:59Z", + "avg_ns": 14290114637, + "stddev_ns": 395960, + "avg_ts": 8.957241, + "stddev_ts": 0.000224, + "samples_ns": [ + 14289701614, + 14290304924, + 14290337375 + ], + "samples_ts": [ + 8.9575, + 8.95712, + 8.9571 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T14:01:56Z", + "avg_ns": 42384680447, + "stddev_ns": 16631510, + "avg_ts": 3.019959, + "stddev_ts": 0.001185, + "samples_ns": [ + 42403782351, + 42376820745, + 42373438247 + ], + "samples_ts": [ + 3.0186, + 3.02052, + 3.02076 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1048 + }, + { + "timestamp_utc": "2025-12-10T14:13:33.441650+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:04:05Z\",\n \"avg_ns\": 14286777339,\n \"stddev_ns\": 706216,\n \"avg_ts\": 8.959333,\n \"stddev_ts\": 0.000430,\n \"samples_ns\": [ 14286325590, 14287566340, 14286440089 ],\n \"samples_ts\": [ 8.95962, 8.95884, 8.95954 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:05:02Z\",\n \"avg_ns\": 170103197373,\n \"stddev_ns\": 30004587,\n \"avg_ts\": 3.009938,\n \"stddev_ts\": 0.000531,\n \"samples_ns\": [ 170118810385, 170122176136, 170068605598 ],\n \"samples_ts\": [ 3.00966, 3.0096, 3.01055 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T14:04:05Z", + "avg_ns": 14286777339, + "stddev_ns": 706216, + "avg_ts": 8.959333, + "stddev_ts": 0.00043, + "samples_ns": [ + 14286325590, + 14287566340, + 14286440089 + ], + "samples_ts": [ + 8.95962, + 8.95884, + 8.95954 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T14:05:02Z", + "avg_ns": 170103197373, + "stddev_ns": 30004587, + "avg_ts": 3.009938, + "stddev_ts": 0.000531, + "samples_ns": [ + 170118810385, + 170122176136, + 170068605598 + ], + "samples_ts": [ + 3.00966, + 3.0096, + 3.01055 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1049 + }, + { + "timestamp_utc": "2025-12-10T14:19:33.435193+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:13:34Z\",\n \"avg_ns\": 57978510336,\n \"stddev_ns\": 4555214,\n \"avg_ts\": 8.830858,\n \"stddev_ts\": 0.000692,\n \"samples_ns\": [ 57973285352, 57981521172, 57980724486 ],\n \"samples_ts\": [ 8.83165, 8.8304, 8.83052 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:17:26Z\",\n \"avg_ns\": 42159732727,\n \"stddev_ns\": 6236860,\n \"avg_ts\": 3.036072,\n \"stddev_ts\": 0.000449,\n \"samples_ns\": [ 42164761961, 42152753972, 42161682248 ],\n \"samples_ts\": [ 3.03571, 3.03658, 3.03593 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T14:13:34Z", + "avg_ns": 57978510336, + "stddev_ns": 4555214, + "avg_ts": 8.830858, + "stddev_ts": 0.000692, + "samples_ns": [ + 57973285352, + 57981521172, + 57980724486 + ], + "samples_ts": [ + 8.83165, + 8.8304, + 8.83052 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T14:17:26Z", + "avg_ns": 42159732727, + "stddev_ns": 6236860, + "avg_ts": 3.036072, + "stddev_ts": 0.000449, + "samples_ns": [ + 42164761961, + 42152753972, + 42161682248 + ], + "samples_ts": [ + 3.03571, + 3.03658, + 3.03593 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1050 + }, + { + "timestamp_utc": "2025-12-10T14:31:59.917709+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:19:34Z\",\n \"avg_ns\": 58066588845,\n \"stddev_ns\": 3556841,\n \"avg_ts\": 8.817463,\n \"stddev_ts\": 0.000539,\n \"samples_ns\": [ 58068615240, 58068660020, 58062491276 ],\n \"samples_ts\": [ 8.81716, 8.81715, 8.81809 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:23:26Z\",\n \"avg_ns\": 170878071138,\n \"stddev_ns\": 33936870,\n \"avg_ts\": 2.996289,\n \"stddev_ts\": 0.000595,\n \"samples_ns\": [ 170913353082, 170875198358, 170845661974 ],\n \"samples_ts\": [ 2.99567, 2.99634, 2.99686 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T14:19:34Z", + "avg_ns": 58066588845, + "stddev_ns": 3556841, + "avg_ts": 8.817463, + "stddev_ts": 0.000539, + "samples_ns": [ + 58068615240, + 58068660020, + 58062491276 + ], + "samples_ts": [ + 8.81716, + 8.81715, + 8.81809 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T14:23:26Z", + "avg_ns": 170878071138, + "stddev_ns": 33936870, + "avg_ts": 2.996289, + "stddev_ts": 0.000595, + "samples_ns": [ + 170913353082, + 170875198358, + 170845661974 + ], + "samples_ts": [ + 2.99567, + 2.99634, + 2.99686 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1051 + }, + { + "timestamp_utc": "2025-12-10T14:35:05.301058+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:32:01Z\",\n \"avg_ns\": 14298360591,\n \"stddev_ns\": 4822309,\n \"avg_ts\": 8.952076,\n \"stddev_ts\": 0.003018,\n \"samples_ns\": [ 14297117985, 14294282712, 14303681077 ],\n \"samples_ts\": [ 8.95285, 8.95463, 8.94875 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:32:58Z\",\n \"avg_ns\": 42195419963,\n \"stddev_ns\": 8902772,\n \"avg_ts\": 3.033505,\n \"stddev_ts\": 0.000640,\n \"samples_ns\": [ 42185406851, 42198421438, 42202431601 ],\n \"samples_ts\": [ 3.03422, 3.03329, 3.033 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T14:32:01Z", + "avg_ns": 14298360591, + "stddev_ns": 4822309, + "avg_ts": 8.952076, + "stddev_ts": 0.003018, + "samples_ns": [ + 14297117985, + 14294282712, + 14303681077 + ], + "samples_ts": [ + 8.95285, + 8.95463, + 8.94875 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T14:32:58Z", + "avg_ns": 42195419963, + "stddev_ns": 8902772, + "avg_ts": 3.033505, + "stddev_ts": 0.00064, + "samples_ns": [ + 42185406851, + 42198421438, + 42202431601 + ], + "samples_ts": [ + 3.03422, + 3.03329, + 3.033 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1052 + }, + { + "timestamp_utc": "2025-12-10T14:44:35.298883+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:35:06Z\",\n \"avg_ns\": 14285441583,\n \"stddev_ns\": 337231,\n \"avg_ts\": 8.960171,\n \"stddev_ts\": 0.000183,\n \"samples_ns\": [ 14285292387, 14285254534, 14285777830 ],\n \"samples_ts\": [ 8.96026, 8.96029, 8.95996 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:36:03Z\",\n \"avg_ns\": 170427333686,\n \"stddev_ns\": 23656075,\n \"avg_ts\": 3.004213,\n \"stddev_ts\": 0.000417,\n \"samples_ns\": [ 170403724544, 170427254432, 170451022084 ],\n \"samples_ts\": [ 3.00463, 3.00421, 3.0038 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T14:35:06Z", + "avg_ns": 14285441583, + "stddev_ns": 337231, + "avg_ts": 8.960171, + "stddev_ts": 0.000183, + "samples_ns": [ + 14285292387, + 14285254534, + 14285777830 + ], + "samples_ts": [ + 8.96026, + 8.96029, + 8.95996 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T14:36:03Z", + "avg_ns": 170427333686, + "stddev_ns": 23656075, + "avg_ts": 3.004213, + "stddev_ts": 0.000417, + "samples_ns": [ + 170403724544, + 170427254432, + 170451022084 + ], + "samples_ts": [ + 3.00463, + 3.00421, + 3.0038 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1053 + }, + { + "timestamp_utc": "2025-12-10T14:50:42.354065+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:44:36Z\",\n \"avg_ns\": 59346523041,\n \"stddev_ns\": 489658285,\n \"avg_ts\": 8.627685,\n \"stddev_ts\": 0.070848,\n \"samples_ns\": [ 59911887260, 59057704808, 59069977056 ],\n \"samples_ts\": [ 8.54588, 8.66949, 8.66769 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:48:34Z\",\n \"avg_ns\": 42447978977,\n \"stddev_ns\": 16464309,\n \"avg_ts\": 3.015456,\n \"stddev_ts\": 0.001169,\n \"samples_ns\": [ 42466946497, 42439573872, 42437416564 ],\n \"samples_ts\": [ 3.01411, 3.01605, 3.01621 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T14:44:36Z", + "avg_ns": 59346523041, + "stddev_ns": 489658285, + "avg_ts": 8.627685, + "stddev_ts": 0.070848, + "samples_ns": [ + 59911887260, + 59057704808, + 59069977056 + ], + "samples_ts": [ + 8.54588, + 8.66949, + 8.66769 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T14:48:34Z", + "avg_ns": 42447978977, + "stddev_ns": 16464309, + "avg_ts": 3.015456, + "stddev_ts": 0.001169, + "samples_ns": [ + 42466946497, + 42439573872, + 42437416564 + ], + "samples_ts": [ + 3.01411, + 3.01605, + 3.01621 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1054 + }, + { + "timestamp_utc": "2025-12-10T15:03:12.939392+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:50:43Z\",\n \"avg_ns\": 59013447480,\n \"stddev_ns\": 6945251,\n \"avg_ts\": 8.675989,\n \"stddev_ts\": 0.001020,\n \"samples_ns\": [ 59019841815, 59014427976, 59006072651 ],\n \"samples_ts\": [ 8.67505, 8.67584, 8.67707 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T14:54:39Z\",\n \"avg_ns\": 170974332755,\n \"stddev_ns\": 27239555,\n \"avg_ts\": 2.994602,\n \"stddev_ts\": 0.000477,\n \"samples_ns\": [ 170998882886, 170945039472, 170979075909 ],\n \"samples_ts\": [ 2.99417, 2.99511, 2.99452 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T14:50:43Z", + "avg_ns": 59013447480, + "stddev_ns": 6945251, + "avg_ts": 8.675989, + "stddev_ts": 0.00102, + "samples_ns": [ + 59019841815, + 59014427976, + 59006072651 + ], + "samples_ts": [ + 8.67505, + 8.67584, + 8.67707 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T14:54:39Z", + "avg_ns": 170974332755, + "stddev_ns": 27239555, + "avg_ts": 2.994602, + "stddev_ts": 0.000477, + "samples_ns": [ + 170998882886, + 170945039472, + 170979075909 + ], + "samples_ts": [ + 2.99417, + 2.99511, + 2.99452 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1055 + }, + { + "timestamp_utc": "2025-12-10T15:06:19.296960+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:03:14Z\",\n \"avg_ns\": 14401794779,\n \"stddev_ns\": 182538467,\n \"avg_ts\": 8.888726,\n \"stddev_ts\": 0.111844,\n \"samples_ns\": [ 14612572044, 14296430757, 14296381536 ],\n \"samples_ts\": [ 8.75958, 8.95328, 8.95331 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:04:11Z\",\n \"avg_ns\": 42322736665,\n \"stddev_ns\": 5632847,\n \"avg_ts\": 3.024379,\n \"stddev_ts\": 0.000402,\n \"samples_ns\": [ 42316282354, 42325330795, 42326596848 ],\n \"samples_ts\": [ 3.02484, 3.02419, 3.0241 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T15:03:14Z", + "avg_ns": 14401794779, + "stddev_ns": 182538467, + "avg_ts": 8.888726, + "stddev_ts": 0.111844, + "samples_ns": [ + 14612572044, + 14296430757, + 14296381536 + ], + "samples_ts": [ + 8.75958, + 8.95328, + 8.95331 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T15:04:11Z", + "avg_ns": 42322736665, + "stddev_ns": 5632847, + "avg_ts": 3.024379, + "stddev_ts": 0.000402, + "samples_ns": [ + 42316282354, + 42325330795, + 42326596848 + ], + "samples_ts": [ + 3.02484, + 3.02419, + 3.0241 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1056 + }, + { + "timestamp_utc": "2025-12-10T15:15:50.111361+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:06:20Z\",\n \"avg_ns\": 14298499482,\n \"stddev_ns\": 1735507,\n \"avg_ts\": 8.951988,\n \"stddev_ts\": 0.001084,\n \"samples_ns\": [ 14297403150, 14300495484, 14297599813 ],\n \"samples_ts\": [ 8.95267, 8.95074, 8.95255 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:07:17Z\",\n \"avg_ns\": 170680121804,\n \"stddev_ns\": 42224928,\n \"avg_ts\": 2.999764,\n \"stddev_ts\": 0.000742,\n \"samples_ns\": [ 170644664621, 170726830821, 170668869971 ],\n \"samples_ts\": [ 3.00039, 2.99894, 2.99996 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T15:06:20Z", + "avg_ns": 14298499482, + "stddev_ns": 1735507, + "avg_ts": 8.951988, + "stddev_ts": 0.001084, + "samples_ns": [ + 14297403150, + 14300495484, + 14297599813 + ], + "samples_ts": [ + 8.95267, + 8.95074, + 8.95255 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T15:07:17Z", + "avg_ns": 170680121804, + "stddev_ns": 42224928, + "avg_ts": 2.999764, + "stddev_ts": 0.000742, + "samples_ns": [ + 170644664621, + 170726830821, + 170668869971 + ], + "samples_ts": [ + 3.00039, + 2.99894, + 2.99996 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1057 + }, + { + "timestamp_utc": "2025-12-10T15:21:48.865654+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:15:51Z\",\n \"avg_ns\": 57427236807,\n \"stddev_ns\": 1237369,\n \"avg_ts\": 8.915630,\n \"stddev_ts\": 0.000188,\n \"samples_ns\": [ 57426099964, 57428515403, 57427095055 ],\n \"samples_ts\": [ 8.91581, 8.91543, 8.91565 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:19:40Z\",\n \"avg_ns\": 42487768542,\n \"stddev_ns\": 2828391,\n \"avg_ts\": 3.012632,\n \"stddev_ts\": 0.000199,\n \"samples_ns\": [ 42490415196, 42488076547, 42484813885 ],\n \"samples_ts\": [ 3.01244, 3.01261, 3.01284 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T15:15:51Z", + "avg_ns": 57427236807, + "stddev_ns": 1237369, + "avg_ts": 8.91563, + "stddev_ts": 0.000188, + "samples_ns": [ + 57426099964, + 57428515403, + 57427095055 + ], + "samples_ts": [ + 8.91581, + 8.91543, + 8.91565 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T15:19:40Z", + "avg_ns": 42487768542, + "stddev_ns": 2828391, + "avg_ts": 3.012632, + "stddev_ts": 0.000199, + "samples_ns": [ + 42490415196, + 42488076547, + 42484813885 + ], + "samples_ts": [ + 3.01244, + 3.01261, + 3.01284 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1058 + }, + { + "timestamp_utc": "2025-12-10T15:34:12.046003+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:21:49Z\",\n \"avg_ns\": 57430616816,\n \"stddev_ns\": 1139641,\n \"avg_ts\": 8.915105,\n \"stddev_ts\": 0.000177,\n \"samples_ns\": [ 57430740036, 57431689840, 57429420572 ],\n \"samples_ts\": [ 8.91509, 8.91494, 8.91529 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:25:39Z\",\n \"avg_ns\": 170619465652,\n \"stddev_ns\": 81141085,\n \"avg_ts\": 3.000830,\n \"stddev_ts\": 0.001427,\n \"samples_ns\": [ 170631264341, 170694060397, 170533072219 ],\n \"samples_ts\": [ 3.00062, 2.99952, 3.00235 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T15:21:49Z", + "avg_ns": 57430616816, + "stddev_ns": 1139641, + "avg_ts": 8.915105, + "stddev_ts": 0.000177, + "samples_ns": [ + 57430740036, + 57431689840, + 57429420572 + ], + "samples_ts": [ + 8.91509, + 8.91494, + 8.91529 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T15:25:39Z", + "avg_ns": 170619465652, + "stddev_ns": 81141085, + "avg_ts": 3.00083, + "stddev_ts": 0.001427, + "samples_ns": [ + 170631264341, + 170694060397, + 170533072219 + ], + "samples_ts": [ + 3.00062, + 2.99952, + 3.00235 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1059 + }, + { + "timestamp_utc": "2025-12-10T15:37:18.145237+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:34:13Z\",\n \"avg_ns\": 14289724170,\n \"stddev_ns\": 963131,\n \"avg_ts\": 8.957486,\n \"stddev_ts\": 0.000594,\n \"samples_ns\": [ 14290816448, 14289243145, 14289112919 ],\n \"samples_ts\": [ 8.9568, 8.95779, 8.95787 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:35:10Z\",\n \"avg_ns\": 42457986359,\n \"stddev_ns\": 30121234,\n \"avg_ts\": 3.014746,\n \"stddev_ts\": 0.002140,\n \"samples_ns\": [ 42473224809, 42477443526, 42423290742 ],\n \"samples_ts\": [ 3.01366, 3.01336, 3.01721 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T15:34:13Z", + "avg_ns": 14289724170, + "stddev_ns": 963131, + "avg_ts": 8.957486, + "stddev_ts": 0.000594, + "samples_ns": [ + 14290816448, + 14289243145, + 14289112919 + ], + "samples_ts": [ + 8.9568, + 8.95779, + 8.95787 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T15:35:10Z", + "avg_ns": 42457986359, + "stddev_ns": 30121234, + "avg_ts": 3.014746, + "stddev_ts": 0.00214, + "samples_ns": [ + 42473224809, + 42477443526, + 42423290742 + ], + "samples_ts": [ + 3.01366, + 3.01336, + 3.01721 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1060 + }, + { + "timestamp_utc": "2025-12-10T15:46:48.907595+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:37:19Z\",\n \"avg_ns\": 14292681526,\n \"stddev_ns\": 746372,\n \"avg_ts\": 8.955632,\n \"stddev_ts\": 0.000462,\n \"samples_ns\": [ 14293075829, 14291831556, 14293137194 ],\n \"samples_ts\": [ 8.95539, 8.95616, 8.95535 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:38:16Z\",\n \"avg_ns\": 170673149242,\n \"stddev_ns\": 39477355,\n \"avg_ts\": 2.999886,\n \"stddev_ts\": 0.000694,\n \"samples_ns\": [ 170715963019, 170638195164, 170665289544 ],\n \"samples_ts\": [ 2.99913, 3.0005, 3.00002 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T15:37:19Z", + "avg_ns": 14292681526, + "stddev_ns": 746372, + "avg_ts": 8.955632, + "stddev_ts": 0.000462, + "samples_ns": [ + 14293075829, + 14291831556, + 14293137194 + ], + "samples_ts": [ + 8.95539, + 8.95616, + 8.95535 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T15:38:16Z", + "avg_ns": 170673149242, + "stddev_ns": 39477355, + "avg_ts": 2.999886, + "stddev_ts": 0.000694, + "samples_ns": [ + 170715963019, + 170638195164, + 170665289544 + ], + "samples_ts": [ + 2.99913, + 3.0005, + 3.00002 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1061 + }, + { + "timestamp_utc": "2025-12-10T15:52:49.869727+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:46:50Z\",\n \"avg_ns\": 58022684191,\n \"stddev_ns\": 537624,\n \"avg_ts\": 8.824135,\n \"stddev_ts\": 0.000082,\n \"samples_ns\": [ 58022798661, 58023155362, 58022098550 ],\n \"samples_ts\": [ 8.82412, 8.82406, 8.82422 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:50:42Z\",\n \"avg_ns\": 42434788665,\n \"stddev_ns\": 11019045,\n \"avg_ts\": 3.016393,\n \"stddev_ts\": 0.000783,\n \"samples_ns\": [ 42437753454, 42444018041, 42422594502 ],\n \"samples_ts\": [ 3.01618, 3.01574, 3.01726 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T15:46:50Z", + "avg_ns": 58022684191, + "stddev_ns": 537624, + "avg_ts": 8.824135, + "stddev_ts": 8.2e-05, + "samples_ns": [ + 58022798661, + 58023155362, + 58022098550 + ], + "samples_ts": [ + 8.82412, + 8.82406, + 8.82422 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T15:50:42Z", + "avg_ns": 42434788665, + "stddev_ns": 11019045, + "avg_ts": 3.016393, + "stddev_ts": 0.000783, + "samples_ns": [ + 42437753454, + 42444018041, + 42422594502 + ], + "samples_ts": [ + 3.01618, + 3.01574, + 3.01726 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1062 + }, + { + "timestamp_utc": "2025-12-10T16:05:16.020967+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:52:50Z\",\n \"avg_ns\": 57997642374,\n \"stddev_ns\": 2282930,\n \"avg_ts\": 8.827945,\n \"stddev_ts\": 0.000346,\n \"samples_ns\": [ 57999212480, 57995039397, 57998675246 ],\n \"samples_ts\": [ 8.82771, 8.82834, 8.82779 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T15:56:43Z\",\n \"avg_ns\": 170828361929,\n \"stddev_ns\": 100059644,\n \"avg_ts\": 2.997161,\n \"stddev_ts\": 0.001756,\n \"samples_ns\": [ 170725930532, 170833289203, 170925866053 ],\n \"samples_ts\": [ 2.99896, 2.99707, 2.99545 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T15:52:50Z", + "avg_ns": 57997642374, + "stddev_ns": 2282930, + "avg_ts": 8.827945, + "stddev_ts": 0.000346, + "samples_ns": [ + 57999212480, + 57995039397, + 57998675246 + ], + "samples_ts": [ + 8.82771, + 8.82834, + 8.82779 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T15:56:43Z", + "avg_ns": 170828361929, + "stddev_ns": 100059644, + "avg_ts": 2.997161, + "stddev_ts": 0.001756, + "samples_ns": [ + 170725930532, + 170833289203, + 170925866053 + ], + "samples_ts": [ + 2.99896, + 2.99707, + 2.99545 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1063 + }, + { + "timestamp_utc": "2025-12-10T16:08:21.685997+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:05:17Z\",\n \"avg_ns\": 14295321506,\n \"stddev_ns\": 1255132,\n \"avg_ts\": 8.953978,\n \"stddev_ts\": 0.000779,\n \"samples_ns\": [ 14294968948, 14294292156, 14296703416 ],\n \"samples_ts\": [ 8.9542, 8.95462, 8.95311 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:06:14Z\",\n \"avg_ns\": 42303048163,\n \"stddev_ns\": 16477744,\n \"avg_ts\": 3.025787,\n \"stddev_ts\": 0.001179,\n \"samples_ns\": [ 42312854975, 42312262169, 42284027347 ],\n \"samples_ts\": [ 3.02509, 3.02513, 3.02715 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T16:05:17Z", + "avg_ns": 14295321506, + "stddev_ns": 1255132, + "avg_ts": 8.953978, + "stddev_ts": 0.000779, + "samples_ns": [ + 14294968948, + 14294292156, + 14296703416 + ], + "samples_ts": [ + 8.9542, + 8.95462, + 8.95311 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T16:06:14Z", + "avg_ns": 42303048163, + "stddev_ns": 16477744, + "avg_ts": 3.025787, + "stddev_ts": 0.001179, + "samples_ns": [ + 42312854975, + 42312262169, + 42284027347 + ], + "samples_ts": [ + 3.02509, + 3.02513, + 3.02715 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1064 + }, + { + "timestamp_utc": "2025-12-10T16:17:53.433313+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:08:22Z\",\n \"avg_ns\": 14448547835,\n \"stddev_ns\": 164894183,\n \"avg_ts\": 8.859789,\n \"stddev_ts\": 0.100893,\n \"samples_ns\": [ 14623281253, 14426688773, 14295673480 ],\n \"samples_ts\": [ 8.75317, 8.87244, 8.95376 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:09:20Z\",\n \"avg_ns\": 170734659215,\n \"stddev_ns\": 74835148,\n \"avg_ts\": 2.998806,\n \"stddev_ts\": 0.001315,\n \"samples_ns\": [ 170649321893, 170789080070, 170765575684 ],\n \"samples_ts\": [ 3.0003, 2.99785, 2.99826 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T16:08:22Z", + "avg_ns": 14448547835, + "stddev_ns": 164894183, + "avg_ts": 8.859789, + "stddev_ts": 0.100893, + "samples_ns": [ + 14623281253, + 14426688773, + 14295673480 + ], + "samples_ts": [ + 8.75317, + 8.87244, + 8.95376 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T16:09:20Z", + "avg_ns": 170734659215, + "stddev_ns": 74835148, + "avg_ts": 2.998806, + "stddev_ts": 0.001315, + "samples_ns": [ + 170649321893, + 170789080070, + 170765575684 + ], + "samples_ts": [ + 3.0003, + 2.99785, + 2.99826 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1065 + }, + { + "timestamp_utc": "2025-12-10T16:23:58.111069+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:17:54Z\",\n \"avg_ns\": 59007286229,\n \"stddev_ns\": 2112905,\n \"avg_ts\": 8.676895,\n \"stddev_ts\": 0.000307,\n \"samples_ns\": [ 59005576493, 59009608734, 59006673462 ],\n \"samples_ts\": [ 8.67715, 8.67655, 8.67698 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:21:50Z\",\n \"avg_ns\": 42346883281,\n \"stddev_ns\": 6561150,\n \"avg_ts\": 3.022655,\n \"stddev_ts\": 0.000468,\n \"samples_ns\": [ 42339314575, 42350669948, 42350665322 ],\n \"samples_ts\": [ 3.02319, 3.02238, 3.02238 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T16:17:54Z", + "avg_ns": 59007286229, + "stddev_ns": 2112905, + "avg_ts": 8.676895, + "stddev_ts": 0.000307, + "samples_ns": [ + 59005576493, + 59009608734, + 59006673462 + ], + "samples_ts": [ + 8.67715, + 8.67655, + 8.67698 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T16:21:50Z", + "avg_ns": 42346883281, + "stddev_ns": 6561150, + "avg_ts": 3.022655, + "stddev_ts": 0.000468, + "samples_ns": [ + 42339314575, + 42350669948, + 42350665322 + ], + "samples_ts": [ + 3.02319, + 3.02238, + 3.02238 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1066 + }, + { + "timestamp_utc": "2025-12-10T16:36:28.951062+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:23:59Z\",\n \"avg_ns\": 59012594186,\n \"stddev_ns\": 4594830,\n \"avg_ts\": 8.676114,\n \"stddev_ts\": 0.000675,\n \"samples_ns\": [ 59017689972, 59011302551, 59008790036 ],\n \"samples_ts\": [ 8.67536, 8.6763, 8.67667 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:27:55Z\",\n \"avg_ns\": 171064778498,\n \"stddev_ns\": 48350843,\n \"avg_ts\": 2.993018,\n \"stddev_ts\": 0.000846,\n \"samples_ns\": [ 171115758079, 171019579705, 171058997711 ],\n \"samples_ts\": [ 2.99213, 2.99381, 2.99312 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T16:23:59Z", + "avg_ns": 59012594186, + "stddev_ns": 4594830, + "avg_ts": 8.676114, + "stddev_ts": 0.000675, + "samples_ns": [ + 59017689972, + 59011302551, + 59008790036 + ], + "samples_ts": [ + 8.67536, + 8.6763, + 8.67667 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T16:27:55Z", + "avg_ns": 171064778498, + "stddev_ns": 48350843, + "avg_ts": 2.993018, + "stddev_ts": 0.000846, + "samples_ns": [ + 171115758079, + 171019579705, + 171058997711 + ], + "samples_ts": [ + 2.99213, + 2.99381, + 2.99312 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1067 + }, + { + "timestamp_utc": "2025-12-10T16:39:35.646240+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:36:30Z\",\n \"avg_ns\": 14341286401,\n \"stddev_ns\": 91814848,\n \"avg_ts\": 8.925523,\n \"stddev_ts\": 0.056932,\n \"samples_ns\": [ 14447295052, 14287020810, 14289543341 ],\n \"samples_ts\": [ 8.85979, 8.95918, 8.9576 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:37:27Z\",\n \"avg_ns\": 42485054827,\n \"stddev_ns\": 16486098,\n \"avg_ts\": 3.012824,\n \"stddev_ts\": 0.001169,\n \"samples_ns\": [ 42499230583, 42466963431, 42488970467 ],\n \"samples_ts\": [ 3.01182, 3.01411, 3.01255 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T16:36:30Z", + "avg_ns": 14341286401, + "stddev_ns": 91814848, + "avg_ts": 8.925523, + "stddev_ts": 0.056932, + "samples_ns": [ + 14447295052, + 14287020810, + 14289543341 + ], + "samples_ts": [ + 8.85979, + 8.95918, + 8.9576 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T16:37:27Z", + "avg_ns": 42485054827, + "stddev_ns": 16486098, + "avg_ts": 3.012824, + "stddev_ts": 0.001169, + "samples_ns": [ + 42499230583, + 42466963431, + 42488970467 + ], + "samples_ts": [ + 3.01182, + 3.01411, + 3.01255 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1068 + }, + { + "timestamp_utc": "2025-12-10T16:49:06.895454+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:39:36Z\",\n \"avg_ns\": 14312275262,\n \"stddev_ns\": 49476531,\n \"avg_ts\": 8.943443,\n \"stddev_ts\": 0.030855,\n \"samples_ns\": [ 14369404276, 14283386048, 14284035464 ],\n \"samples_ts\": [ 8.90782, 8.96146, 8.96105 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:40:34Z\",\n \"avg_ns\": 170783966331,\n \"stddev_ns\": 26235330,\n \"avg_ts\": 2.997940,\n \"stddev_ts\": 0.000461,\n \"samples_ns\": [ 170760530093, 170779060683, 170812308217 ],\n \"samples_ts\": [ 2.99835, 2.99803, 2.99744 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T16:39:36Z", + "avg_ns": 14312275262, + "stddev_ns": 49476531, + "avg_ts": 8.943443, + "stddev_ts": 0.030855, + "samples_ns": [ + 14369404276, + 14283386048, + 14284035464 + ], + "samples_ts": [ + 8.90782, + 8.96146, + 8.96105 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T16:40:34Z", + "avg_ns": 170783966331, + "stddev_ns": 26235330, + "avg_ts": 2.99794, + "stddev_ts": 0.000461, + "samples_ns": [ + 170760530093, + 170779060683, + 170812308217 + ], + "samples_ts": [ + 2.99835, + 2.99803, + 2.99744 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1069 + }, + { + "timestamp_utc": "2025-12-10T16:55:05.703313+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:49:08Z\",\n \"avg_ns\": 57507514110,\n \"stddev_ns\": 21079647,\n \"avg_ts\": 8.903185,\n \"stddev_ts\": 0.003263,\n \"samples_ns\": [ 57489469284, 57502391482, 57530681565 ],\n \"samples_ts\": [ 8.90598, 8.90398, 8.8996 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:52:58Z\",\n \"avg_ns\": 42400591319,\n \"stddev_ns\": 30123417,\n \"avg_ts\": 3.018827,\n \"stddev_ts\": 0.002146,\n \"samples_ns\": [ 42366119559, 42421851327, 42413803071 ],\n \"samples_ts\": [ 3.02128, 3.01731, 3.01789 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T16:49:08Z", + "avg_ns": 57507514110, + "stddev_ns": 21079647, + "avg_ts": 8.903185, + "stddev_ts": 0.003263, + "samples_ns": [ + 57489469284, + 57502391482, + 57530681565 + ], + "samples_ts": [ + 8.90598, + 8.90398, + 8.8996 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T16:52:58Z", + "avg_ns": 42400591319, + "stddev_ns": 30123417, + "avg_ts": 3.018827, + "stddev_ts": 0.002146, + "samples_ns": [ + 42366119559, + 42421851327, + 42413803071 + ], + "samples_ts": [ + 3.02128, + 3.01731, + 3.01789 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1070 + }, + { + "timestamp_utc": "2025-12-10T17:07:31.568741+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:55:06Z\",\n \"avg_ns\": 57512770002,\n \"stddev_ns\": 3349860,\n \"avg_ts\": 8.902371,\n \"stddev_ts\": 0.000516,\n \"samples_ns\": [ 57516423208, 57511990830, 57509895970 ],\n \"samples_ts\": [ 8.90181, 8.90249, 8.90282 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T16:58:56Z\",\n \"avg_ns\": 171408980253,\n \"stddev_ns\": 32452774,\n \"avg_ts\": 2.987008,\n \"stddev_ts\": 0.000566,\n \"samples_ns\": [ 171405541103, 171443015641, 171378384015 ],\n \"samples_ts\": [ 2.98707, 2.98642, 2.98754 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T16:55:06Z", + "avg_ns": 57512770002, + "stddev_ns": 3349860, + "avg_ts": 8.902371, + "stddev_ts": 0.000516, + "samples_ns": [ + 57516423208, + 57511990830, + 57509895970 + ], + "samples_ts": [ + 8.90181, + 8.90249, + 8.90282 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T16:58:56Z", + "avg_ns": 171408980253, + "stddev_ns": 32452774, + "avg_ts": 2.987008, + "stddev_ts": 0.000566, + "samples_ns": [ + 171405541103, + 171443015641, + 171378384015 + ], + "samples_ts": [ + 2.98707, + 2.98642, + 2.98754 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1071 + }, + { + "timestamp_utc": "2025-12-10T17:10:37.358425+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:07:32Z\",\n \"avg_ns\": 14291536325,\n \"stddev_ns\": 758836,\n \"avg_ts\": 8.956350,\n \"stddev_ts\": 0.000470,\n \"samples_ns\": [ 14291377673, 14290878995, 14292352308 ],\n \"samples_ts\": [ 8.95645, 8.95676, 8.95584 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:08:29Z\",\n \"avg_ns\": 42345534257,\n \"stddev_ns\": 19048152,\n \"avg_ts\": 3.022751,\n \"stddev_ts\": 0.001360,\n \"samples_ns\": [ 42323790874, 42353548260, 42359263639 ],\n \"samples_ts\": [ 3.0243, 3.02218, 3.02177 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T17:07:32Z", + "avg_ns": 14291536325, + "stddev_ns": 758836, + "avg_ts": 8.95635, + "stddev_ts": 0.00047, + "samples_ns": [ + 14291377673, + 14290878995, + 14292352308 + ], + "samples_ts": [ + 8.95645, + 8.95676, + 8.95584 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T17:08:29Z", + "avg_ns": 42345534257, + "stddev_ns": 19048152, + "avg_ts": 3.022751, + "stddev_ts": 0.00136, + "samples_ns": [ + 42323790874, + 42353548260, + 42359263639 + ], + "samples_ts": [ + 3.0243, + 3.02218, + 3.02177 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1072 + }, + { + "timestamp_utc": "2025-12-10T17:20:10.548126+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:10:38Z\",\n \"avg_ns\": 14303505133,\n \"stddev_ns\": 1648755,\n \"avg_ts\": 8.948856,\n \"stddev_ts\": 0.001026,\n \"samples_ns\": [ 14302563055, 14305398901, 14302553445 ],\n \"samples_ts\": [ 8.94944, 8.94767, 8.94945 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:11:35Z\",\n \"avg_ns\": 171467178281,\n \"stddev_ns\": 37233916,\n \"avg_ts\": 2.985994,\n \"stddev_ts\": 0.000648,\n \"samples_ns\": [ 171442271916, 171449281517, 171509981410 ],\n \"samples_ts\": [ 2.98643, 2.98631, 2.98525 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T17:10:38Z", + "avg_ns": 14303505133, + "stddev_ns": 1648755, + "avg_ts": 8.948856, + "stddev_ts": 0.001026, + "samples_ns": [ + 14302563055, + 14305398901, + 14302553445 + ], + "samples_ts": [ + 8.94944, + 8.94767, + 8.94945 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T17:11:35Z", + "avg_ns": 171467178281, + "stddev_ns": 37233916, + "avg_ts": 2.985994, + "stddev_ts": 0.000648, + "samples_ns": [ + 171442271916, + 171449281517, + 171509981410 + ], + "samples_ts": [ + 2.98643, + 2.98631, + 2.98525 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1073 + }, + { + "timestamp_utc": "2025-12-10T17:26:11.132179+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:20:11Z\",\n \"avg_ns\": 57957789337,\n \"stddev_ns\": 1704110,\n \"avg_ts\": 8.834015,\n \"stddev_ts\": 0.000257,\n \"samples_ns\": [ 57956348424, 57959645074, 57957374514 ],\n \"samples_ts\": [ 8.83423, 8.83373, 8.83408 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:24:03Z\",\n \"avg_ns\": 42385695435,\n \"stddev_ns\": 23301687,\n \"avg_ts\": 3.019887,\n \"stddev_ts\": 0.001660,\n \"samples_ns\": [ 42362428521, 42385626042, 42409031742 ],\n \"samples_ts\": [ 3.02155, 3.01989, 3.01823 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T17:20:11Z", + "avg_ns": 57957789337, + "stddev_ns": 1704110, + "avg_ts": 8.834015, + "stddev_ts": 0.000257, + "samples_ns": [ + 57956348424, + 57959645074, + 57957374514 + ], + "samples_ts": [ + 8.83423, + 8.83373, + 8.83408 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T17:24:03Z", + "avg_ns": 42385695435, + "stddev_ns": 23301687, + "avg_ts": 3.019887, + "stddev_ts": 0.00166, + "samples_ns": [ + 42362428521, + 42385626042, + 42409031742 + ], + "samples_ts": [ + 3.02155, + 3.01989, + 3.01823 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1074 + }, + { + "timestamp_utc": "2025-12-10T17:38:35.806960+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:26:12Z\",\n \"avg_ns\": 57975464376,\n \"stddev_ns\": 1855106,\n \"avg_ts\": 8.831322,\n \"stddev_ts\": 0.000280,\n \"samples_ns\": [ 57977426479, 57975187639, 57973779011 ],\n \"samples_ts\": [ 8.83102, 8.83136, 8.83158 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:30:04Z\",\n \"avg_ns\": 170384357943,\n \"stddev_ns\": 24617510,\n \"avg_ts\": 3.004971,\n \"stddev_ts\": 0.000434,\n \"samples_ns\": [ 170356312148, 170394390044, 170402371638 ],\n \"samples_ts\": [ 3.00547, 3.00479, 3.00465 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T17:26:12Z", + "avg_ns": 57975464376, + "stddev_ns": 1855106, + "avg_ts": 8.831322, + "stddev_ts": 0.00028, + "samples_ns": [ + 57977426479, + 57975187639, + 57973779011 + ], + "samples_ts": [ + 8.83102, + 8.83136, + 8.83158 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T17:30:04Z", + "avg_ns": 170384357943, + "stddev_ns": 24617510, + "avg_ts": 3.004971, + "stddev_ts": 0.000434, + "samples_ns": [ + 170356312148, + 170394390044, + 170402371638 + ], + "samples_ts": [ + 3.00547, + 3.00479, + 3.00465 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1075 + }, + { + "timestamp_utc": "2025-12-10T17:41:41.950283+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:38:36Z\",\n \"avg_ns\": 14288014928,\n \"stddev_ns\": 824749,\n \"avg_ts\": 8.958557,\n \"stddev_ts\": 0.000512,\n \"samples_ns\": [ 14288481717, 14287072658, 14288490410 ],\n \"samples_ts\": [ 8.95826, 8.95915, 8.95826 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:39:34Z\",\n \"avg_ns\": 42475350109,\n \"stddev_ns\": 12176215,\n \"avg_ts\": 3.013513,\n \"stddev_ts\": 0.000864,\n \"samples_ns\": [ 42478001737, 42485982007, 42462066583 ],\n \"samples_ts\": [ 3.01332, 3.01276, 3.01446 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T17:38:36Z", + "avg_ns": 14288014928, + "stddev_ns": 824749, + "avg_ts": 8.958557, + "stddev_ts": 0.000512, + "samples_ns": [ + 14288481717, + 14287072658, + 14288490410 + ], + "samples_ts": [ + 8.95826, + 8.95915, + 8.95826 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T17:39:34Z", + "avg_ns": 42475350109, + "stddev_ns": 12176215, + "avg_ts": 3.013513, + "stddev_ts": 0.000864, + "samples_ns": [ + 42478001737, + 42485982007, + 42462066583 + ], + "samples_ts": [ + 3.01332, + 3.01276, + 3.01446 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1076 + }, + { + "timestamp_utc": "2025-12-10T17:51:13.095393+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:41:43Z\",\n \"avg_ns\": 14287827809,\n \"stddev_ns\": 719781,\n \"avg_ts\": 8.958675,\n \"stddev_ts\": 0.000445,\n \"samples_ns\": [ 14287026590, 14288078974, 14288377864 ],\n \"samples_ts\": [ 8.95918, 8.95852, 8.95833 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:42:40Z\",\n \"avg_ns\": 170805372060,\n \"stddev_ns\": 35669019,\n \"avg_ts\": 2.997564,\n \"stddev_ts\": 0.000626,\n \"samples_ns\": [ 170793006404, 170845573343, 170777536435 ],\n \"samples_ts\": [ 2.99778, 2.99686, 2.99805 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T17:41:43Z", + "avg_ns": 14287827809, + "stddev_ns": 719781, + "avg_ts": 8.958675, + "stddev_ts": 0.000445, + "samples_ns": [ + 14287026590, + 14288078974, + 14288377864 + ], + "samples_ts": [ + 8.95918, + 8.95852, + 8.95833 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T17:42:40Z", + "avg_ns": 170805372060, + "stddev_ns": 35669019, + "avg_ts": 2.997564, + "stddev_ts": 0.000626, + "samples_ns": [ + 170793006404, + 170845573343, + 170777536435 + ], + "samples_ts": [ + 2.99778, + 2.99686, + 2.99805 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1077 + }, + { + "timestamp_utc": "2025-12-10T17:57:18.586655+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:51:14Z\",\n \"avg_ns\": 59069575810,\n \"stddev_ns\": 3436129,\n \"avg_ts\": 8.667745,\n \"stddev_ts\": 0.000503,\n \"samples_ns\": [ 59065681090, 59070913844, 59072132497 ],\n \"samples_ts\": [ 8.66832, 8.66755, 8.66737 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:55:10Z\",\n \"avg_ns\": 42534502177,\n \"stddev_ns\": 6873096,\n \"avg_ts\": 3.009322,\n \"stddev_ts\": 0.000486,\n \"samples_ns\": [ 42537507467, 42526644963, 42539354103 ],\n \"samples_ts\": [ 3.00911, 3.00988, 3.00898 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T17:51:14Z", + "avg_ns": 59069575810, + "stddev_ns": 3436129, + "avg_ts": 8.667745, + "stddev_ts": 0.000503, + "samples_ns": [ + 59065681090, + 59070913844, + 59072132497 + ], + "samples_ts": [ + 8.66832, + 8.66755, + 8.66737 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T17:55:10Z", + "avg_ns": 42534502177, + "stddev_ns": 6873096, + "avg_ts": 3.009322, + "stddev_ts": 0.000486, + "samples_ns": [ + 42537507467, + 42526644963, + 42539354103 + ], + "samples_ts": [ + 3.00911, + 3.00988, + 3.00898 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1078 + }, + { + "timestamp_utc": "2025-12-10T18:09:47.882721+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T17:57:19Z\",\n \"avg_ns\": 59078358238,\n \"stddev_ns\": 3178633,\n \"avg_ts\": 8.666456,\n \"stddev_ts\": 0.000464,\n \"samples_ns\": [ 59078535377, 59081425936, 59075113403 ],\n \"samples_ts\": [ 8.66643, 8.66601, 8.66693 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:01:16Z\",\n \"avg_ns\": 170465250789,\n \"stddev_ns\": 41004240,\n \"avg_ts\": 3.003545,\n \"stddev_ts\": 0.000722,\n \"samples_ns\": [ 170501565105, 170473398759, 170420788505 ],\n \"samples_ts\": [ 3.0029, 3.0034, 3.00433 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T17:57:19Z", + "avg_ns": 59078358238, + "stddev_ns": 3178633, + "avg_ts": 8.666456, + "stddev_ts": 0.000464, + "samples_ns": [ + 59078535377, + 59081425936, + 59075113403 + ], + "samples_ts": [ + 8.66643, + 8.66601, + 8.66693 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T18:01:16Z", + "avg_ns": 170465250789, + "stddev_ns": 41004240, + "avg_ts": 3.003545, + "stddev_ts": 0.000722, + "samples_ns": [ + 170501565105, + 170473398759, + 170420788505 + ], + "samples_ts": [ + 3.0029, + 3.0034, + 3.00433 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1079 + }, + { + "timestamp_utc": "2025-12-10T18:12:43.372713+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:09:49Z\",\n \"avg_ns\": 9886821843,\n \"stddev_ns\": 2810398,\n \"avg_ts\": 12.946527,\n \"stddev_ts\": 0.003676,\n \"samples_ns\": [ 9883916134, 9887031234, 9889518163 ],\n \"samples_ts\": [ 12.9503, 12.9463, 12.943 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:10:28Z\",\n \"avg_ns\": 44776774807,\n \"stddev_ns\": 7874325,\n \"avg_ts\": 2.858625,\n \"stddev_ts\": 0.000502,\n \"samples_ns\": [ 44774298559, 44770442160, 44785583704 ],\n \"samples_ts\": [ 2.85878, 2.85903, 2.85806 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T18:09:49Z", + "avg_ns": 9886821843, + "stddev_ns": 2810398, + "avg_ts": 12.946527, + "stddev_ts": 0.003676, + "samples_ns": [ + 9883916134, + 9887031234, + 9889518163 + ], + "samples_ts": [ + 12.9503, + 12.9463, + 12.943 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T18:10:28Z", + "avg_ns": 44776774807, + "stddev_ns": 7874325, + "avg_ts": 2.858625, + "stddev_ts": 0.000502, + "samples_ns": [ + 44774298559, + 44770442160, + 44785583704 + ], + "samples_ts": [ + 2.85878, + 2.85903, + 2.85806 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1080 + }, + { + "timestamp_utc": "2025-12-10T18:22:26.252560+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:12:44Z\",\n \"avg_ns\": 9918188143,\n \"stddev_ns\": 5188327,\n \"avg_ts\": 12.905585,\n \"stddev_ts\": 0.006751,\n \"samples_ns\": [ 9912200877, 9921297458, 9921066096 ],\n \"samples_ts\": [ 12.9134, 12.9015, 12.9018 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:13:24Z\",\n \"avg_ns\": 180532558823,\n \"stddev_ns\": 149693998,\n \"avg_ts\": 2.836055,\n \"stddev_ts\": 0.002351,\n \"samples_ns\": [ 180389955825, 180519263738, 180688456906 ],\n \"samples_ts\": [ 2.8383, 2.83626, 2.83361 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T18:12:44Z", + "avg_ns": 9918188143, + "stddev_ns": 5188327, + "avg_ts": 12.905585, + "stddev_ts": 0.006751, + "samples_ns": [ + 9912200877, + 9921297458, + 9921066096 + ], + "samples_ts": [ + 12.9134, + 12.9015, + 12.9018 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T18:13:24Z", + "avg_ns": 180532558823, + "stddev_ns": 149693998, + "avg_ts": 2.836055, + "stddev_ts": 0.002351, + "samples_ns": [ + 180389955825, + 180519263738, + 180688456906 + ], + "samples_ts": [ + 2.8383, + 2.83626, + 2.83361 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1081 + }, + { + "timestamp_utc": "2025-12-10T18:27:21.115244+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:22:27Z\",\n \"avg_ns\": 39705243329,\n \"stddev_ns\": 2974399,\n \"avg_ts\": 12.895022,\n \"stddev_ts\": 0.000962,\n \"samples_ns\": [ 39701824242, 39706948175, 39706957572 ],\n \"samples_ts\": [ 12.8961, 12.8945, 12.8945 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:25:06Z\",\n \"avg_ns\": 44813837328,\n \"stddev_ns\": 25199822,\n \"avg_ts\": 2.856261,\n \"stddev_ts\": 0.001606,\n \"samples_ns\": [ 44800902919, 44842877899, 44797731166 ],\n \"samples_ts\": [ 2.85709, 2.85441, 2.85729 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T18:22:27Z", + "avg_ns": 39705243329, + "stddev_ns": 2974399, + "avg_ts": 12.895022, + "stddev_ts": 0.000962, + "samples_ns": [ + 39701824242, + 39706948175, + 39706957572 + ], + "samples_ts": [ + 12.8961, + 12.8945, + 12.8945 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T18:25:06Z", + "avg_ns": 44813837328, + "stddev_ns": 25199822, + "avg_ts": 2.856261, + "stddev_ts": 0.001606, + "samples_ns": [ + 44800902919, + 44842877899, + 44797731166 + ], + "samples_ts": [ + 2.85709, + 2.85441, + 2.85729 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1082 + }, + { + "timestamp_utc": "2025-12-10T18:39:03.559069+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:27:22Z\",\n \"avg_ns\": 39805074016,\n \"stddev_ns\": 19133789,\n \"avg_ts\": 12.862684,\n \"stddev_ts\": 0.006184,\n \"samples_ns\": [ 39783068758, 39814388832, 39817764460 ],\n \"samples_ts\": [ 12.8698, 12.8597, 12.8586 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:30:01Z\",\n \"avg_ns\": 180529805679,\n \"stddev_ns\": 82107790,\n \"avg_ts\": 2.836097,\n \"stddev_ts\": 0.001290,\n \"samples_ns\": [ 180597903934, 180438631260, 180552881845 ],\n \"samples_ts\": [ 2.83503, 2.83753, 2.83573 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T18:27:22Z", + "avg_ns": 39805074016, + "stddev_ns": 19133789, + "avg_ts": 12.862684, + "stddev_ts": 0.006184, + "samples_ns": [ + 39783068758, + 39814388832, + 39817764460 + ], + "samples_ts": [ + 12.8698, + 12.8597, + 12.8586 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T18:30:01Z", + "avg_ns": 180529805679, + "stddev_ns": 82107790, + "avg_ts": 2.836097, + "stddev_ts": 0.00129, + "samples_ns": [ + 180597903934, + 180438631260, + 180552881845 + ], + "samples_ts": [ + 2.83503, + 2.83753, + 2.83573 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1083 + }, + { + "timestamp_utc": "2025-12-10T18:41:59.094064+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:39:04Z\",\n \"avg_ns\": 9905394675,\n \"stddev_ns\": 8325195,\n \"avg_ts\": 12.922257,\n \"stddev_ts\": 0.010865,\n \"samples_ns\": [ 9909760025, 9910628655, 9895795346 ],\n \"samples_ts\": [ 12.9166, 12.9154, 12.9348 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:39:44Z\",\n \"avg_ns\": 44770809575,\n \"stddev_ns\": 75632968,\n \"avg_ts\": 2.859011,\n \"stddev_ts\": 0.004825,\n \"samples_ns\": [ 44727623725, 44726664096, 44858140905 ],\n \"samples_ts\": [ 2.86177, 2.86183, 2.85344 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T18:39:04Z", + "avg_ns": 9905394675, + "stddev_ns": 8325195, + "avg_ts": 12.922257, + "stddev_ts": 0.010865, + "samples_ns": [ + 9909760025, + 9910628655, + 9895795346 + ], + "samples_ts": [ + 12.9166, + 12.9154, + 12.9348 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T18:39:44Z", + "avg_ns": 44770809575, + "stddev_ns": 75632968, + "avg_ts": 2.859011, + "stddev_ts": 0.004825, + "samples_ns": [ + 44727623725, + 44726664096, + 44858140905 + ], + "samples_ts": [ + 2.86177, + 2.86183, + 2.85344 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1084 + }, + { + "timestamp_utc": "2025-12-10T18:51:43.727757+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:42:00Z\",\n \"avg_ns\": 9902990626,\n \"stddev_ns\": 5193890,\n \"avg_ts\": 12.925391,\n \"stddev_ts\": 0.006776,\n \"samples_ns\": [ 9900724758, 9908931468, 9899315653 ],\n \"samples_ts\": [ 12.9283, 12.9176, 12.9302 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:42:39Z\",\n \"avg_ns\": 181144448666,\n \"stddev_ns\": 65257951,\n \"avg_ts\": 2.826474,\n \"stddev_ts\": 0.001018,\n \"samples_ns\": [ 181122835836, 181092741867, 181217768297 ],\n \"samples_ts\": [ 2.82681, 2.82728, 2.82533 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T18:42:00Z", + "avg_ns": 9902990626, + "stddev_ns": 5193890, + "avg_ts": 12.925391, + "stddev_ts": 0.006776, + "samples_ns": [ + 9900724758, + 9908931468, + 9899315653 + ], + "samples_ts": [ + 12.9283, + 12.9176, + 12.9302 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T18:42:39Z", + "avg_ns": 181144448666, + "stddev_ns": 65257951, + "avg_ts": 2.826474, + "stddev_ts": 0.001018, + "samples_ns": [ + 181122835836, + 181092741867, + 181217768297 + ], + "samples_ts": [ + 2.82681, + 2.82728, + 2.82533 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1085 + }, + { + "timestamp_utc": "2025-12-10T18:56:44.779415+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:51:44Z\",\n \"avg_ns\": 39919427080,\n \"stddev_ns\": 132995758,\n \"avg_ts\": 12.825930,\n \"stddev_ts\": 0.042810,\n \"samples_ns\": [ 39766477796, 40007849909, 39983953535 ],\n \"samples_ts\": [ 12.8752, 12.7975, 12.8051 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:54:24Z\",\n \"avg_ns\": 46592347001,\n \"stddev_ns\": 19262325,\n \"avg_ts\": 2.747233,\n \"stddev_ts\": 0.001136,\n \"samples_ns\": [ 46573243298, 46592035943, 46611761763 ],\n \"samples_ts\": [ 2.74836, 2.74725, 2.74609 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T18:51:44Z", + "avg_ns": 39919427080, + "stddev_ns": 132995758, + "avg_ts": 12.82593, + "stddev_ts": 0.04281, + "samples_ns": [ + 39766477796, + 40007849909, + 39983953535 + ], + "samples_ts": [ + 12.8752, + 12.7975, + 12.8051 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T18:54:24Z", + "avg_ns": 46592347001, + "stddev_ns": 19262325, + "avg_ts": 2.747233, + "stddev_ts": 0.001136, + "samples_ns": [ + 46573243298, + 46592035943, + 46611761763 + ], + "samples_ts": [ + 2.74836, + 2.74725, + 2.74609 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1086 + }, + { + "timestamp_utc": "2025-12-10T19:08:50.264544+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:56:45Z\",\n \"avg_ns\": 40052685033,\n \"stddev_ns\": 41309116,\n \"avg_ts\": 12.783172,\n \"stddev_ts\": 0.013177,\n \"samples_ns\": [ 40036517451, 40021905990, 40099631660 ],\n \"samples_ts\": [ 12.7883, 12.793, 12.7682 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T18:59:25Z\",\n \"avg_ns\": 187982414846,\n \"stddev_ns\": 159267568,\n \"avg_ts\": 2.723660,\n \"stddev_ts\": 0.002308,\n \"samples_ns\": [ 188015527409, 188122523261, 187809193868 ],\n \"samples_ts\": [ 2.72318, 2.72163, 2.72617 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T18:56:45Z", + "avg_ns": 40052685033, + "stddev_ns": 41309116, + "avg_ts": 12.783172, + "stddev_ts": 0.013177, + "samples_ns": [ + 40036517451, + 40021905990, + 40099631660 + ], + "samples_ts": [ + 12.7883, + 12.793, + 12.7682 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T18:59:25Z", + "avg_ns": 187982414846, + "stddev_ns": 159267568, + "avg_ts": 2.72366, + "stddev_ts": 0.002308, + "samples_ns": [ + 188015527409, + 188122523261, + 187809193868 + ], + "samples_ts": [ + 2.72318, + 2.72163, + 2.72617 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1087 + }, + { + "timestamp_utc": "2025-12-10T19:11:49.335628+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:08:51Z\",\n \"avg_ns\": 9881794958,\n \"stddev_ns\": 3358176,\n \"avg_ts\": 12.953113,\n \"stddev_ts\": 0.004401,\n \"samples_ns\": [ 9884641282, 9878093354, 9882650239 ],\n \"samples_ts\": [ 12.9494, 12.958, 12.952 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:09:30Z\",\n \"avg_ns\": 45979465034,\n \"stddev_ns\": 328441418,\n \"avg_ts\": 2.783946,\n \"stddev_ts\": 0.019814,\n \"samples_ns\": [ 45843447207, 46354064940, 45740882957 ],\n \"samples_ts\": [ 2.79211, 2.76135, 2.79837 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T19:08:51Z", + "avg_ns": 9881794958, + "stddev_ns": 3358176, + "avg_ts": 12.953113, + "stddev_ts": 0.004401, + "samples_ns": [ + 9884641282, + 9878093354, + 9882650239 + ], + "samples_ts": [ + 12.9494, + 12.958, + 12.952 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T19:09:30Z", + "avg_ns": 45979465034, + "stddev_ns": 328441418, + "avg_ts": 2.783946, + "stddev_ts": 0.019814, + "samples_ns": [ + 45843447207, + 46354064940, + 45740882957 + ], + "samples_ts": [ + 2.79211, + 2.76135, + 2.79837 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1088 + }, + { + "timestamp_utc": "2025-12-10T19:21:45.562281+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:11:50Z\",\n \"avg_ns\": 9873665212,\n \"stddev_ns\": 7815530,\n \"avg_ts\": 12.963783,\n \"stddev_ts\": 0.010262,\n \"samples_ns\": [ 9881393885, 9873836123, 9865765628 ],\n \"samples_ts\": [ 12.9536, 12.9636, 12.9742 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:12:29Z\",\n \"avg_ns\": 185071929385,\n \"stddev_ns\": 1577744239,\n \"avg_ts\": 2.766627,\n \"stddev_ts\": 0.023695,\n \"samples_ns\": [ 186164397643, 183263094160, 185788296353 ],\n \"samples_ts\": [ 2.75026, 2.7938, 2.75582 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T19:11:50Z", + "avg_ns": 9873665212, + "stddev_ns": 7815530, + "avg_ts": 12.963783, + "stddev_ts": 0.010262, + "samples_ns": [ + 9881393885, + 9873836123, + 9865765628 + ], + "samples_ts": [ + 12.9536, + 12.9636, + 12.9742 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T19:12:29Z", + "avg_ns": 185071929385, + "stddev_ns": 1577744239, + "avg_ts": 2.766627, + "stddev_ts": 0.023695, + "samples_ns": [ + 186164397643, + 183263094160, + 185788296353 + ], + "samples_ts": [ + 2.75026, + 2.7938, + 2.75582 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1089 + }, + { + "timestamp_utc": "2025-12-10T19:26:50.177622+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:21:46Z\",\n \"avg_ns\": 40803498537,\n \"stddev_ns\": 30580561,\n \"avg_ts\": 12.547948,\n \"stddev_ts\": 0.009404,\n \"samples_ns\": [ 40773943444, 40835010572, 40801541595 ],\n \"samples_ts\": [ 12.557, 12.5383, 12.5485 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:24:29Z\",\n \"avg_ns\": 46606488806,\n \"stddev_ns\": 10099699,\n \"avg_ts\": 2.746399,\n \"stddev_ts\": 0.000595,\n \"samples_ns\": [ 46618150689, 46600592358, 46600723371 ],\n \"samples_ts\": [ 2.74571, 2.74675, 2.74674 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T19:21:46Z", + "avg_ns": 40803498537, + "stddev_ns": 30580561, + "avg_ts": 12.547948, + "stddev_ts": 0.009404, + "samples_ns": [ + 40773943444, + 40835010572, + 40801541595 + ], + "samples_ts": [ + 12.557, + 12.5383, + 12.5485 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T19:24:29Z", + "avg_ns": 46606488806, + "stddev_ns": 10099699, + "avg_ts": 2.746399, + "stddev_ts": 0.000595, + "samples_ns": [ + 46618150689, + 46600592358, + 46600723371 + ], + "samples_ts": [ + 2.74571, + 2.74675, + 2.74674 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1090 + }, + { + "timestamp_utc": "2025-12-10T19:38:47.027002+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:26:51Z\",\n \"avg_ns\": 40881721120,\n \"stddev_ns\": 15842058,\n \"avg_ts\": 12.523936,\n \"stddev_ts\": 0.004852,\n \"samples_ns\": [ 40877950633, 40899105610, 40868107119 ],\n \"samples_ts\": [ 12.5251, 12.5186, 12.5281 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:29:34Z\",\n \"avg_ns\": 183903835845,\n \"stddev_ns\": 2043730870,\n \"avg_ts\": 2.784800,\n \"stddev_ts\": 0.055460,\n \"samples_ns\": [ 187545596871, 183941285330, 180224625334 ],\n \"samples_ts\": [ 2.73, 2.7835, 2.8409 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T19:26:51Z", + "avg_ns": 40881721120, + "stddev_ns": 15842058, + "avg_ts": 12.523936, + "stddev_ts": 0.004852, + "samples_ns": [ + 40877950633, + 40899105610, + 40868107119 + ], + "samples_ts": [ + 12.5251, + 12.5186, + 12.5281 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T19:29:34Z", + "avg_ns": 183903835845, + "stddev_ns": 2043730870, + "avg_ts": 2.7848, + "stddev_ts": 0.05546, + "samples_ns": [ + 187545596871, + 183941285330, + 180224625334 + ], + "samples_ts": [ + 2.73, + 2.7835, + 2.8409 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1091 + }, + { + "timestamp_utc": "2025-12-10T19:41:42.512216+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:38:48Z\",\n \"avg_ns\": 9890321427,\n \"stddev_ns\": 1201141,\n \"avg_ts\": 12.941946,\n \"stddev_ts\": 0.001566,\n \"samples_ns\": [ 9888988941, 9890669535, 9891305806 ],\n \"samples_ts\": [ 12.9437, 12.9415, 12.9407 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:39:27Z\",\n \"avg_ns\": 44775874886,\n \"stddev_ns\": 15458871,\n \"avg_ts\": 2.858683,\n \"stddev_ts\": 0.000987,\n \"samples_ns\": [ 44766422113, 44767489609, 44793712937 ],\n \"samples_ts\": [ 2.85929, 2.85922, 2.85754 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T19:38:48Z", + "avg_ns": 9890321427, + "stddev_ns": 1201141, + "avg_ts": 12.941946, + "stddev_ts": 0.001566, + "samples_ns": [ + 9888988941, + 9890669535, + 9891305806 + ], + "samples_ts": [ + 12.9437, + 12.9415, + 12.9407 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T19:39:27Z", + "avg_ns": 44775874886, + "stddev_ns": 15458871, + "avg_ts": 2.858683, + "stddev_ts": 0.000987, + "samples_ns": [ + 44766422113, + 44767489609, + 44793712937 + ], + "samples_ts": [ + 2.85929, + 2.85922, + 2.85754 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1092 + }, + { + "timestamp_utc": "2025-12-10T19:51:25.601655+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:41:43Z\",\n \"avg_ns\": 9887525609,\n \"stddev_ns\": 3875284,\n \"avg_ts\": 12.945606,\n \"stddev_ts\": 0.005074,\n \"samples_ns\": [ 9887203348, 9891551961, 9883821518 ],\n \"samples_ts\": [ 12.946, 12.9403, 12.9505 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:42:23Z\",\n \"avg_ns\": 180628328521,\n \"stddev_ns\": 105898443,\n \"avg_ts\": 2.834550,\n \"stddev_ts\": 0.001662,\n \"samples_ns\": [ 180626794559, 180523205391, 180734985613 ],\n \"samples_ts\": [ 2.83457, 2.8362, 2.83288 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T19:41:43Z", + "avg_ns": 9887525609, + "stddev_ns": 3875284, + "avg_ts": 12.945606, + "stddev_ts": 0.005074, + "samples_ns": [ + 9887203348, + 9891551961, + 9883821518 + ], + "samples_ts": [ + 12.946, + 12.9403, + 12.9505 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T19:42:23Z", + "avg_ns": 180628328521, + "stddev_ns": 105898443, + "avg_ts": 2.83455, + "stddev_ts": 0.001662, + "samples_ns": [ + 180626794559, + 180523205391, + 180734985613 + ], + "samples_ts": [ + 2.83457, + 2.8362, + 2.83288 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1093 + }, + { + "timestamp_utc": "2025-12-10T19:56:20.314463+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:51:26Z\",\n \"avg_ns\": 39706549617,\n \"stddev_ns\": 6809119,\n \"avg_ts\": 12.894598,\n \"stddev_ts\": 0.002210,\n \"samples_ns\": [ 39712803298, 39707545035, 39699300519 ],\n \"samples_ts\": [ 12.8926, 12.8943, 12.897 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:54:05Z\",\n \"avg_ns\": 44793349652,\n \"stddev_ns\": 20829348,\n \"avg_ts\": 2.857567,\n \"stddev_ts\": 0.001329,\n \"samples_ns\": [ 44802861494, 44807724999, 44769462463 ],\n \"samples_ts\": [ 2.85696, 2.85665, 2.85909 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T19:51:26Z", + "avg_ns": 39706549617, + "stddev_ns": 6809119, + "avg_ts": 12.894598, + "stddev_ts": 0.00221, + "samples_ns": [ + 39712803298, + 39707545035, + 39699300519 + ], + "samples_ts": [ + 12.8926, + 12.8943, + 12.897 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T19:54:05Z", + "avg_ns": 44793349652, + "stddev_ns": 20829348, + "avg_ts": 2.857567, + "stddev_ts": 0.001329, + "samples_ns": [ + 44802861494, + 44807724999, + 44769462463 + ], + "samples_ts": [ + 2.85696, + 2.85665, + 2.85909 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1094 + }, + { + "timestamp_utc": "2025-12-10T20:08:02.019935+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:56:21Z\",\n \"avg_ns\": 39696232478,\n \"stddev_ns\": 22111459,\n \"avg_ts\": 12.897952,\n \"stddev_ts\": 0.007183,\n \"samples_ns\": [ 39719616326, 39693413774, 39675667336 ],\n \"samples_ts\": [ 12.8904, 12.8989, 12.9046 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T19:59:00Z\",\n \"avg_ns\": 180434163925,\n \"stddev_ns\": 42231225,\n \"avg_ts\": 2.837600,\n \"stddev_ts\": 0.000664,\n \"samples_ns\": [ 180412556527, 180482824410, 180407110839 ],\n \"samples_ts\": [ 2.83794, 2.83684, 2.83803 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T19:56:21Z", + "avg_ns": 39696232478, + "stddev_ns": 22111459, + "avg_ts": 12.897952, + "stddev_ts": 0.007183, + "samples_ns": [ + 39719616326, + 39693413774, + 39675667336 + ], + "samples_ts": [ + 12.8904, + 12.8989, + 12.9046 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T19:59:00Z", + "avg_ns": 180434163925, + "stddev_ns": 42231225, + "avg_ts": 2.8376, + "stddev_ts": 0.000664, + "samples_ns": [ + 180412556527, + 180482824410, + 180407110839 + ], + "samples_ts": [ + 2.83794, + 2.83684, + 2.83803 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1095 + }, + { + "timestamp_utc": "2025-12-10T20:10:57.610002+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:08:03Z\",\n \"avg_ns\": 9858579162,\n \"stddev_ns\": 51347485,\n \"avg_ts\": 12.983851,\n \"stddev_ts\": 0.067828,\n \"samples_ns\": [ 9799315002, 9886673720, 9889748766 ],\n \"samples_ts\": [ 13.0621, 12.9467, 12.9427 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:08:42Z\",\n \"avg_ns\": 44851255090,\n \"stddev_ns\": 13115239,\n \"avg_ts\": 2.853878,\n \"stddev_ts\": 0.000834,\n \"samples_ns\": [ 44854283336, 44836894097, 44862587839 ],\n \"samples_ts\": [ 2.85369, 2.85479, 2.85316 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T20:08:03Z", + "avg_ns": 9858579162, + "stddev_ns": 51347485, + "avg_ts": 12.983851, + "stddev_ts": 0.067828, + "samples_ns": [ + 9799315002, + 9886673720, + 9889748766 + ], + "samples_ts": [ + 13.0621, + 12.9467, + 12.9427 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T20:08:42Z", + "avg_ns": 44851255090, + "stddev_ns": 13115239, + "avg_ts": 2.853878, + "stddev_ts": 0.000834, + "samples_ns": [ + 44854283336, + 44836894097, + 44862587839 + ], + "samples_ts": [ + 2.85369, + 2.85479, + 2.85316 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1096 + }, + { + "timestamp_utc": "2025-12-10T20:20:42.156222+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:10:58Z\",\n \"avg_ns\": 9917002915,\n \"stddev_ns\": 7883318,\n \"avg_ts\": 12.907131,\n \"stddev_ts\": 0.010256,\n \"samples_ns\": [ 9910409850, 9925734097, 9914864799 ],\n \"samples_ts\": [ 12.9157, 12.8958, 12.9099 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:11:38Z\",\n \"avg_ns\": 181092217449,\n \"stddev_ns\": 175286000,\n \"avg_ts\": 2.827291,\n \"stddev_ts\": 0.002737,\n \"samples_ns\": [ 180916922456, 181092235435, 181267494456 ],\n \"samples_ts\": [ 2.83003, 2.82729, 2.82455 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T20:10:58Z", + "avg_ns": 9917002915, + "stddev_ns": 7883318, + "avg_ts": 12.907131, + "stddev_ts": 0.010256, + "samples_ns": [ + 9910409850, + 9925734097, + 9914864799 + ], + "samples_ts": [ + 12.9157, + 12.8958, + 12.9099 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T20:11:38Z", + "avg_ns": 181092217449, + "stddev_ns": 175286000, + "avg_ts": 2.827291, + "stddev_ts": 0.002737, + "samples_ns": [ + 180916922456, + 181092235435, + 181267494456 + ], + "samples_ts": [ + 2.83003, + 2.82729, + 2.82455 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1097 + }, + { + "timestamp_utc": "2025-12-10T20:25:40.303181+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:20:43Z\",\n \"avg_ns\": 39985149562,\n \"stddev_ns\": 75726912,\n \"avg_ts\": 12.804785,\n \"stddev_ts\": 0.024277,\n \"samples_ns\": [ 39897938886, 40023251383, 40034258417 ],\n \"samples_ts\": [ 12.8327, 12.7926, 12.789 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:23:22Z\",\n \"avg_ns\": 45642497697,\n \"stddev_ns\": 778460506,\n \"avg_ts\": 2.804948,\n \"stddev_ts\": 0.047841,\n \"samples_ns\": [ 46422778541, 45638844103, 44865870448 ],\n \"samples_ts\": [ 2.75727, 2.80463, 2.85295 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T20:20:43Z", + "avg_ns": 39985149562, + "stddev_ns": 75726912, + "avg_ts": 12.804785, + "stddev_ts": 0.024277, + "samples_ns": [ + 39897938886, + 40023251383, + 40034258417 + ], + "samples_ts": [ + 12.8327, + 12.7926, + 12.789 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T20:23:22Z", + "avg_ns": 45642497697, + "stddev_ns": 778460506, + "avg_ts": 2.804948, + "stddev_ts": 0.047841, + "samples_ns": [ + 46422778541, + 45638844103, + 44865870448 + ], + "samples_ts": [ + 2.75727, + 2.80463, + 2.85295 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1098 + }, + { + "timestamp_utc": "2025-12-10T20:37:26.030362+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:25:41Z\",\n \"avg_ns\": 40036897594,\n \"stddev_ns\": 72110855,\n \"avg_ts\": 12.788231,\n \"stddev_ts\": 0.023057,\n \"samples_ns\": [ 39953633109, 40078124294, 40078935381 ],\n \"samples_ts\": [ 12.8149, 12.775, 12.7748 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:28:21Z\",\n \"avg_ns\": 181325849289,\n \"stddev_ns\": 1426599355,\n \"avg_ts\": 2.823762,\n \"stddev_ts\": 0.022120,\n \"samples_ns\": [ 182965461877, 180643659426, 180368426566 ],\n \"samples_ts\": [ 2.79834, 2.83431, 2.83863 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T20:25:41Z", + "avg_ns": 40036897594, + "stddev_ns": 72110855, + "avg_ts": 12.788231, + "stddev_ts": 0.023057, + "samples_ns": [ + 39953633109, + 40078124294, + 40078935381 + ], + "samples_ts": [ + 12.8149, + 12.775, + 12.7748 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T20:28:21Z", + "avg_ns": 181325849289, + "stddev_ns": 1426599355, + "avg_ts": 2.823762, + "stddev_ts": 0.02212, + "samples_ns": [ + 182965461877, + 180643659426, + 180368426566 + ], + "samples_ts": [ + 2.79834, + 2.83431, + 2.83863 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1099 + }, + { + "timestamp_utc": "2025-12-10T20:40:22.626547+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:37:27Z\",\n \"avg_ns\": 9891760419,\n \"stddev_ns\": 1085543,\n \"avg_ts\": 12.940063,\n \"stddev_ts\": 0.001420,\n \"samples_ns\": [ 9892786247, 9891871334, 9890623676 ],\n \"samples_ts\": [ 12.9387, 12.9399, 12.9415 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:38:06Z\",\n \"avg_ns\": 45149919664,\n \"stddev_ns\": 377057526,\n \"avg_ts\": 2.835131,\n \"stddev_ts\": 0.023565,\n \"samples_ns\": [ 44913235623, 45584738906, 44951784464 ],\n \"samples_ts\": [ 2.84994, 2.80796, 2.8475 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T20:37:27Z", + "avg_ns": 9891760419, + "stddev_ns": 1085543, + "avg_ts": 12.940063, + "stddev_ts": 0.00142, + "samples_ns": [ + 9892786247, + 9891871334, + 9890623676 + ], + "samples_ts": [ + 12.9387, + 12.9399, + 12.9415 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T20:38:06Z", + "avg_ns": 45149919664, + "stddev_ns": 377057526, + "avg_ts": 2.835131, + "stddev_ts": 0.023565, + "samples_ns": [ + 44913235623, + 45584738906, + 44951784464 + ], + "samples_ts": [ + 2.84994, + 2.80796, + 2.8475 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1100 + }, + { + "timestamp_utc": "2025-12-10T20:50:07.838760+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:40:23Z\",\n \"avg_ns\": 9897756768,\n \"stddev_ns\": 3452844,\n \"avg_ts\": 12.932224,\n \"stddev_ts\": 0.004512,\n \"samples_ns\": [ 9900192616, 9893805326, 9899272362 ],\n \"samples_ts\": [ 12.929, 12.9374, 12.9302 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:41:03Z\",\n \"avg_ns\": 181341514796,\n \"stddev_ns\": 1283895578,\n \"avg_ts\": 2.823496,\n \"stddev_ts\": 0.019917,\n \"samples_ns\": [ 182807807114, 180797783484, 180418953790 ],\n \"samples_ts\": [ 2.80076, 2.83189, 2.83784 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T20:40:23Z", + "avg_ns": 9897756768, + "stddev_ns": 3452844, + "avg_ts": 12.932224, + "stddev_ts": 0.004512, + "samples_ns": [ + 9900192616, + 9893805326, + 9899272362 + ], + "samples_ts": [ + 12.929, + 12.9374, + 12.9302 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T20:41:03Z", + "avg_ns": 181341514796, + "stddev_ns": 1283895578, + "avg_ts": 2.823496, + "stddev_ts": 0.019917, + "samples_ns": [ + 182807807114, + 180797783484, + 180418953790 + ], + "samples_ts": [ + 2.80076, + 2.83189, + 2.83784 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1101 + }, + { + "timestamp_utc": "2025-12-10T20:55:12.265282+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:50:08Z\",\n \"avg_ns\": 40736639033,\n \"stddev_ns\": 80006691,\n \"avg_ts\": 12.568570,\n \"stddev_ts\": 0.024706,\n \"samples_ns\": [ 40646812374, 40800244833, 40762859893 ],\n \"samples_ts\": [ 12.5963, 12.5489, 12.5605 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:52:51Z\",\n \"avg_ns\": 46609789527,\n \"stddev_ns\": 9124333,\n \"avg_ts\": 2.746204,\n \"stddev_ts\": 0.000538,\n \"samples_ns\": [ 46610402615, 46618591855, 46600374111 ],\n \"samples_ts\": [ 2.74617, 2.74569, 2.74676 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T20:50:08Z", + "avg_ns": 40736639033, + "stddev_ns": 80006691, + "avg_ts": 12.56857, + "stddev_ts": 0.024706, + "samples_ns": [ + 40646812374, + 40800244833, + 40762859893 + ], + "samples_ts": [ + 12.5963, + 12.5489, + 12.5605 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T20:52:51Z", + "avg_ns": 46609789527, + "stddev_ns": 9124333, + "avg_ts": 2.746204, + "stddev_ts": 0.000538, + "samples_ns": [ + 46610402615, + 46618591855, + 46600374111 + ], + "samples_ts": [ + 2.74617, + 2.74569, + 2.74676 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1102 + }, + { + "timestamp_utc": "2025-12-10T21:07:07.095696+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:55:13Z\",\n \"avg_ns\": 40797012532,\n \"stddev_ns\": 78787537,\n \"avg_ts\": 12.549970,\n \"stddev_ts\": 0.024262,\n \"samples_ns\": [ 40833688016, 40850776508, 40706573072 ],\n \"samples_ts\": [ 12.5387, 12.5334, 12.5778 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T20:57:56Z\",\n \"avg_ns\": 183322326664,\n \"stddev_ns\": 3766502433,\n \"avg_ts\": 2.793673,\n \"stddev_ts\": 0.056827,\n \"samples_ns\": [ 187598237617, 181872833043, 180495909332 ],\n \"samples_ts\": [ 2.72924, 2.81515, 2.83663 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T20:55:13Z", + "avg_ns": 40797012532, + "stddev_ns": 78787537, + "avg_ts": 12.54997, + "stddev_ts": 0.024262, + "samples_ns": [ + 40833688016, + 40850776508, + 40706573072 + ], + "samples_ts": [ + 12.5387, + 12.5334, + 12.5778 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T20:57:56Z", + "avg_ns": 183322326664, + "stddev_ns": 3766502433, + "avg_ts": 2.793673, + "stddev_ts": 0.056827, + "samples_ns": [ + 187598237617, + 181872833043, + 180495909332 + ], + "samples_ts": [ + 2.72924, + 2.81515, + 2.83663 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1103 + }, + { + "timestamp_utc": "2025-12-10T21:10:02.913713+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:07:08Z\",\n \"avg_ns\": 9890912587,\n \"stddev_ns\": 6879372,\n \"avg_ts\": 12.941176,\n \"stddev_ts\": 0.009001,\n \"samples_ns\": [ 9897429602, 9891586289, 9883721871 ],\n \"samples_ts\": [ 12.9327, 12.9403, 12.9506 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:07:47Z\",\n \"avg_ns\": 44867454119,\n \"stddev_ns\": 15585183,\n \"avg_ts\": 2.852848,\n \"stddev_ts\": 0.000991,\n \"samples_ns\": [ 44882564137, 44851433683, 44868364537 ],\n \"samples_ts\": [ 2.85189, 2.85387, 2.85279 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T21:07:08Z", + "avg_ns": 9890912587, + "stddev_ns": 6879372, + "avg_ts": 12.941176, + "stddev_ts": 0.009001, + "samples_ns": [ + 9897429602, + 9891586289, + 9883721871 + ], + "samples_ts": [ + 12.9327, + 12.9403, + 12.9506 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T21:07:47Z", + "avg_ns": 44867454119, + "stddev_ns": 15585183, + "avg_ts": 2.852848, + "stddev_ts": 0.000991, + "samples_ns": [ + 44882564137, + 44851433683, + 44868364537 + ], + "samples_ts": [ + 2.85189, + 2.85387, + 2.85279 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1104 + }, + { + "timestamp_utc": "2025-12-10T21:19:53.900853+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:10:04Z\",\n \"avg_ns\": 9892808474,\n \"stddev_ns\": 2594723,\n \"avg_ts\": 12.938692,\n \"stddev_ts\": 0.003389,\n \"samples_ns\": [ 9894161859, 9889821140, 9894442425 ],\n \"samples_ts\": [ 12.9369, 12.9426, 12.9366 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:10:43Z\",\n \"avg_ns\": 183277282418,\n \"stddev_ns\": 3385009939,\n \"avg_ts\": 2.793705,\n \"stddev_ts\": 0.022724,\n \"samples_ns\": [ 182908584963, 184922099060, 182001163233 ],\n \"samples_ts\": [ 2.79921, 2.76873, 2.81317 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T21:10:04Z", + "avg_ns": 9892808474, + "stddev_ns": 2594723, + "avg_ts": 12.938692, + "stddev_ts": 0.003389, + "samples_ns": [ + 9894161859, + 9889821140, + 9894442425 + ], + "samples_ts": [ + 12.9369, + 12.9426, + 12.9366 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T21:10:43Z", + "avg_ns": 183277282418, + "stddev_ns": 3385009939, + "avg_ts": 2.793705, + "stddev_ts": 0.022724, + "samples_ns": [ + 182908584963, + 184922099060, + 182001163233 + ], + "samples_ts": [ + 2.79921, + 2.76873, + 2.81317 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1105 + }, + { + "timestamp_utc": "2025-12-10T21:24:50.953644+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:19:55Z\",\n \"avg_ns\": 39797233522,\n \"stddev_ns\": 14248592,\n \"avg_ts\": 12.865217,\n \"stddev_ts\": 0.004607,\n \"samples_ns\": [ 39806203669, 39780803795, 39804693102 ],\n \"samples_ts\": [ 12.8623, 12.8705, 12.8628 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:22:34Z\",\n \"avg_ns\": 45423322834,\n \"stddev_ns\": 717815685,\n \"avg_ts\": 2.818402,\n \"stddev_ts\": 0.044267,\n \"samples_ns\": [ 46221294156, 45218478062, 44830196284 ],\n \"samples_ts\": [ 2.76929, 2.8307, 2.85522 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T21:19:55Z", + "avg_ns": 39797233522, + "stddev_ns": 14248592, + "avg_ts": 12.865217, + "stddev_ts": 0.004607, + "samples_ns": [ + 39806203669, + 39780803795, + 39804693102 + ], + "samples_ts": [ + 12.8623, + 12.8705, + 12.8628 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T21:22:34Z", + "avg_ns": 45423322834, + "stddev_ns": 717815685, + "avg_ts": 2.818402, + "stddev_ts": 0.044267, + "samples_ns": [ + 46221294156, + 45218478062, + 44830196284 + ], + "samples_ts": [ + 2.76929, + 2.8307, + 2.85522 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1106 + }, + { + "timestamp_utc": "2025-12-10T21:36:46.206498+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:24:52Z\",\n \"avg_ns\": 39722806590,\n \"stddev_ns\": 22549939,\n \"avg_ts\": 12.889324,\n \"stddev_ts\": 0.007316,\n \"samples_ns\": [ 39746071042, 39721298313, 39701050417 ],\n \"samples_ts\": [ 12.8818, 12.8898, 12.8964 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:27:31Z\",\n \"avg_ns\": 184909228823,\n \"stddev_ns\": 3747722146,\n \"avg_ts\": 2.769694,\n \"stddev_ts\": 0.056794,\n \"samples_ns\": [ 187250032425, 186890953257, 180586700789 ],\n \"samples_ts\": [ 2.73431, 2.73957, 2.8352 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T21:24:52Z", + "avg_ns": 39722806590, + "stddev_ns": 22549939, + "avg_ts": 12.889324, + "stddev_ts": 0.007316, + "samples_ns": [ + 39746071042, + 39721298313, + 39701050417 + ], + "samples_ts": [ + 12.8818, + 12.8898, + 12.8964 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T21:27:31Z", + "avg_ns": 184909228823, + "stddev_ns": 3747722146, + "avg_ts": 2.769694, + "stddev_ts": 0.056794, + "samples_ns": [ + 187250032425, + 186890953257, + 180586700789 + ], + "samples_ts": [ + 2.73431, + 2.73957, + 2.8352 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1107 + }, + { + "timestamp_utc": "2025-12-10T21:39:42.175446+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:36:47Z\",\n \"avg_ns\": 9913957223,\n \"stddev_ns\": 3986954,\n \"avg_ts\": 12.911092,\n \"stddev_ts\": 0.005191,\n \"samples_ns\": [ 9918387751, 9910658619, 9912825299 ],\n \"samples_ts\": [ 12.9053, 12.9154, 12.9126 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:37:26Z\",\n \"avg_ns\": 44904649761,\n \"stddev_ns\": 44295715,\n \"avg_ts\": 2.850486,\n \"stddev_ts\": 0.002813,\n \"samples_ns\": [ 44935155420, 44853841848, 44924952015 ],\n \"samples_ts\": [ 2.84855, 2.85371, 2.8492 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T21:36:47Z", + "avg_ns": 9913957223, + "stddev_ns": 3986954, + "avg_ts": 12.911092, + "stddev_ts": 0.005191, + "samples_ns": [ + 9918387751, + 9910658619, + 9912825299 + ], + "samples_ts": [ + 12.9053, + 12.9154, + 12.9126 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T21:37:26Z", + "avg_ns": 44904649761, + "stddev_ns": 44295715, + "avg_ts": 2.850486, + "stddev_ts": 0.002813, + "samples_ns": [ + 44935155420, + 44853841848, + 44924952015 + ], + "samples_ts": [ + 2.84855, + 2.85371, + 2.8492 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1108 + }, + { + "timestamp_utc": "2025-12-10T21:49:24.721268+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:39:43Z\",\n \"avg_ns\": 9769493461,\n \"stddev_ns\": 13892219,\n \"avg_ts\": 13.102028,\n \"stddev_ts\": 0.018615,\n \"samples_ns\": [ 9761673678, 9761273957, 9785532749 ],\n \"samples_ts\": [ 13.1125, 13.113, 13.0805 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:40:22Z\",\n \"avg_ns\": 180622836513,\n \"stddev_ns\": 120552861,\n \"avg_ts\": 2.834637,\n \"stddev_ts\": 0.001891,\n \"samples_ns\": [ 180758425456, 180582325010, 180527759075 ],\n \"samples_ts\": [ 2.83251, 2.83527, 2.83613 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T21:39:43Z", + "avg_ns": 9769493461, + "stddev_ns": 13892219, + "avg_ts": 13.102028, + "stddev_ts": 0.018615, + "samples_ns": [ + 9761673678, + 9761273957, + 9785532749 + ], + "samples_ts": [ + 13.1125, + 13.113, + 13.0805 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T21:40:22Z", + "avg_ns": 180622836513, + "stddev_ns": 120552861, + "avg_ts": 2.834637, + "stddev_ts": 0.001891, + "samples_ns": [ + 180758425456, + 180582325010, + 180527759075 + ], + "samples_ts": [ + 2.83251, + 2.83527, + 2.83613 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1109 + }, + { + "timestamp_utc": "2025-12-10T21:54:26.257745+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:49:25Z\",\n \"avg_ns\": 39977597271,\n \"stddev_ns\": 9051351,\n \"avg_ts\": 12.807173,\n \"stddev_ts\": 0.002899,\n \"samples_ns\": [ 39988039463, 39972703990, 39972048361 ],\n \"samples_ts\": [ 12.8038, 12.8087, 12.809 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:52:06Z\",\n \"avg_ns\": 46546950939,\n \"stddev_ns\": 23167244,\n \"avg_ts\": 2.749912,\n \"stddev_ts\": 0.001369,\n \"samples_ns\": [ 46523639849, 46547245459, 46569967511 ],\n \"samples_ts\": [ 2.75129, 2.74989, 2.74855 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T21:49:25Z", + "avg_ns": 39977597271, + "stddev_ns": 9051351, + "avg_ts": 12.807173, + "stddev_ts": 0.002899, + "samples_ns": [ + 39988039463, + 39972703990, + 39972048361 + ], + "samples_ts": [ + 12.8038, + 12.8087, + 12.809 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T21:52:06Z", + "avg_ns": 46546950939, + "stddev_ns": 23167244, + "avg_ts": 2.749912, + "stddev_ts": 0.001369, + "samples_ns": [ + 46523639849, + 46547245459, + 46569967511 + ], + "samples_ts": [ + 2.75129, + 2.74989, + 2.74855 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1110 + }, + { + "timestamp_utc": "2025-12-10T22:06:30.856464+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:54:27Z\",\n \"avg_ns\": 40008911334,\n \"stddev_ns\": 4544139,\n \"avg_ts\": 12.797149,\n \"stddev_ts\": 0.001451,\n \"samples_ns\": [ 40008980369, 40004341885, 40013411750 ],\n \"samples_ts\": [ 12.7971, 12.7986, 12.7957 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T21:57:07Z\",\n \"avg_ns\": 187623823130,\n \"stddev_ns\": 55845387,\n \"avg_ts\": 2.728865,\n \"stddev_ts\": 0.000812,\n \"samples_ns\": [ 187559375560, 187657891540, 187654202291 ],\n \"samples_ts\": [ 2.7298, 2.72837, 2.72842 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T21:54:27Z", + "avg_ns": 40008911334, + "stddev_ns": 4544139, + "avg_ts": 12.797149, + "stddev_ts": 0.001451, + "samples_ns": [ + 40008980369, + 40004341885, + 40013411750 + ], + "samples_ts": [ + 12.7971, + 12.7986, + 12.7957 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T21:57:07Z", + "avg_ns": 187623823130, + "stddev_ns": 55845387, + "avg_ts": 2.728865, + "stddev_ts": 0.000812, + "samples_ns": [ + 187559375560, + 187657891540, + 187654202291 + ], + "samples_ts": [ + 2.7298, + 2.72837, + 2.72842 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1111 + }, + { + "timestamp_utc": "2025-12-10T22:09:31.381479+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:06:31Z\",\n \"avg_ns\": 9887829694,\n \"stddev_ns\": 7032522,\n \"avg_ts\": 12.945211,\n \"stddev_ts\": 0.009207,\n \"samples_ns\": [ 9880512336, 9888440639, 9894536108 ],\n \"samples_ts\": [ 12.9548, 12.9444, 12.9364 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:07:11Z\",\n \"avg_ns\": 46459027202,\n \"stddev_ns\": 277464235,\n \"avg_ts\": 2.755182,\n \"stddev_ts\": 0.016511,\n \"samples_ns\": [ 46138920516, 46630705415, 46607455676 ],\n \"samples_ts\": [ 2.77423, 2.74497, 2.74634 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T22:06:31Z", + "avg_ns": 9887829694, + "stddev_ns": 7032522, + "avg_ts": 12.945211, + "stddev_ts": 0.009207, + "samples_ns": [ + 9880512336, + 9888440639, + 9894536108 + ], + "samples_ts": [ + 12.9548, + 12.9444, + 12.9364 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T22:07:11Z", + "avg_ns": 46459027202, + "stddev_ns": 277464235, + "avg_ts": 2.755182, + "stddev_ts": 0.016511, + "samples_ns": [ + 46138920516, + 46630705415, + 46607455676 + ], + "samples_ts": [ + 2.77423, + 2.74497, + 2.74634 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1112 + }, + { + "timestamp_utc": "2025-12-10T22:19:30.134798+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:09:32Z\",\n \"avg_ns\": 9885792275,\n \"stddev_ns\": 4331087,\n \"avg_ts\": 12.947876,\n \"stddev_ts\": 0.005670,\n \"samples_ns\": [ 9883493095, 9890786831, 9883096900 ],\n \"samples_ts\": [ 12.9509, 12.9413, 12.9514 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:10:12Z\",\n \"avg_ns\": 185864118159,\n \"stddev_ns\": 2266745945,\n \"avg_ts\": 2.754975,\n \"stddev_ts\": 0.033702,\n \"samples_ns\": [ 186246982119, 183430321674, 187915050684 ],\n \"samples_ts\": [ 2.74904, 2.79125, 2.72464 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T22:09:32Z", + "avg_ns": 9885792275, + "stddev_ns": 4331087, + "avg_ts": 12.947876, + "stddev_ts": 0.00567, + "samples_ns": [ + 9883493095, + 9890786831, + 9883096900 + ], + "samples_ts": [ + 12.9509, + 12.9413, + 12.9514 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T22:10:12Z", + "avg_ns": 185864118159, + "stddev_ns": 2266745945, + "avg_ts": 2.754975, + "stddev_ts": 0.033702, + "samples_ns": [ + 186246982119, + 183430321674, + 187915050684 + ], + "samples_ts": [ + 2.74904, + 2.79125, + 2.72464 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1113 + }, + { + "timestamp_utc": "2025-12-10T22:24:34.271710+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:19:31Z\",\n \"avg_ns\": 40801227708,\n \"stddev_ns\": 17403332,\n \"avg_ts\": 12.548644,\n \"stddev_ts\": 0.005352,\n \"samples_ns\": [ 40820096801, 40797780147, 40785806176 ],\n \"samples_ts\": [ 12.5428, 12.5497, 12.5534 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:22:14Z\",\n \"avg_ns\": 46550349194,\n \"stddev_ns\": 28982504,\n \"avg_ts\": 2.749712,\n \"stddev_ts\": 0.001712,\n \"samples_ns\": [ 46527407233, 46540719573, 46582920776 ],\n \"samples_ts\": [ 2.75107, 2.75028, 2.74779 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T22:19:31Z", + "avg_ns": 40801227708, + "stddev_ns": 17403332, + "avg_ts": 12.548644, + "stddev_ts": 0.005352, + "samples_ns": [ + 40820096801, + 40797780147, + 40785806176 + ], + "samples_ts": [ + 12.5428, + 12.5497, + 12.5534 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T22:22:14Z", + "avg_ns": 46550349194, + "stddev_ns": 28982504, + "avg_ts": 2.749712, + "stddev_ts": 0.001712, + "samples_ns": [ + 46527407233, + 46540719573, + 46582920776 + ], + "samples_ts": [ + 2.75107, + 2.75028, + 2.74779 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1114 + }, + { + "timestamp_utc": "2025-12-10T22:36:34.276975+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:24:35Z\",\n \"avg_ns\": 40804554213,\n \"stddev_ns\": 6644612,\n \"avg_ts\": 12.547619,\n \"stddev_ts\": 0.002043,\n \"samples_ns\": [ 40811718329, 40798593613, 40803350697 ],\n \"samples_ts\": [ 12.5454, 12.5495, 12.548 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:27:18Z\",\n \"avg_ns\": 185055948106,\n \"stddev_ns\": 281692700,\n \"avg_ts\": 2.767234,\n \"stddev_ts\": 0.045828,\n \"samples_ns\": [ 187760161070, 185657849556, 181749833692 ],\n \"samples_ts\": [ 2.72688, 2.75776, 2.81706 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T22:24:35Z", + "avg_ns": 40804554213, + "stddev_ns": 6644612, + "avg_ts": 12.547619, + "stddev_ts": 0.002043, + "samples_ns": [ + 40811718329, + 40798593613, + 40803350697 + ], + "samples_ts": [ + 12.5454, + 12.5495, + 12.548 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T22:27:18Z", + "avg_ns": 185055948106, + "stddev_ns": 281692700, + "avg_ts": 2.767234, + "stddev_ts": 0.045828, + "samples_ns": [ + 187760161070, + 185657849556, + 181749833692 + ], + "samples_ts": [ + 2.72688, + 2.75776, + 2.81706 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1115 + }, + { + "timestamp_utc": "2025-12-10T22:39:36.726848+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:36:35Z\",\n \"avg_ns\": 7647674754,\n \"stddev_ns\": 10642507,\n \"avg_ts\": 16.737135,\n \"stddev_ts\": 0.023289,\n \"samples_ns\": [ 7637144800, 7647454505, 7658424959 ],\n \"samples_ts\": [ 16.7602, 16.7376, 16.7136 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:37:05Z\",\n \"avg_ns\": 50079938593,\n \"stddev_ns\": 55489413,\n \"avg_ts\": 2.555916,\n \"stddev_ts\": 0.002830,\n \"samples_ns\": [ 50037847755, 50142819952, 50059148074 ],\n \"samples_ts\": [ 2.55806, 2.55271, 2.55698 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T22:36:35Z", + "avg_ns": 7647674754, + "stddev_ns": 10642507, + "avg_ts": 16.737135, + "stddev_ts": 0.023289, + "samples_ns": [ + 7637144800, + 7647454505, + 7658424959 + ], + "samples_ts": [ + 16.7602, + 16.7376, + 16.7136 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T22:37:05Z", + "avg_ns": 50079938593, + "stddev_ns": 55489413, + "avg_ts": 2.555916, + "stddev_ts": 0.00283, + "samples_ns": [ + 50037847755, + 50142819952, + 50059148074 + ], + "samples_ts": [ + 2.55806, + 2.55271, + 2.55698 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1116 + }, + { + "timestamp_utc": "2025-12-10T22:50:13.802394+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:39:37Z\",\n \"avg_ns\": 7640483407,\n \"stddev_ns\": 5301903,\n \"avg_ts\": 16.752872,\n \"stddev_ts\": 0.011628,\n \"samples_ns\": [ 7634471286, 7644486134, 7642492802 ],\n \"samples_ts\": [ 16.7661, 16.7441, 16.7485 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:40:08Z\",\n \"avg_ns\": 201627549968,\n \"stddev_ns\": 131796235,\n \"avg_ts\": 2.539336,\n \"stddev_ts\": 0.001660,\n \"samples_ns\": [ 201606402298, 201507606257, 201768641349 ],\n \"samples_ts\": [ 2.5396, 2.54085, 2.53756 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T22:39:37Z", + "avg_ns": 7640483407, + "stddev_ns": 5301903, + "avg_ts": 16.752872, + "stddev_ts": 0.011628, + "samples_ns": [ + 7634471286, + 7644486134, + 7642492802 + ], + "samples_ts": [ + 16.7661, + 16.7441, + 16.7485 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T22:40:08Z", + "avg_ns": 201627549968, + "stddev_ns": 131796235, + "avg_ts": 2.539336, + "stddev_ts": 0.00166, + "samples_ns": [ + 201606402298, + 201507606257, + 201768641349 + ], + "samples_ts": [ + 2.5396, + 2.54085, + 2.53756 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1117 + }, + { + "timestamp_utc": "2025-12-10T22:54:48.027246+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:50:14Z\",\n \"avg_ns\": 30633156657,\n \"stddev_ns\": 14851097,\n \"avg_ts\": 16.713918,\n \"stddev_ts\": 0.008102,\n \"samples_ns\": [ 30631910740, 30648590431, 30618968801 ],\n \"samples_ts\": [ 16.7146, 16.7055, 16.7217 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:52:17Z\",\n \"avg_ns\": 50025953000,\n \"stddev_ns\": 28811560,\n \"avg_ts\": 2.558672,\n \"stddev_ts\": 0.001473,\n \"samples_ns\": [ 50000539437, 50057253305, 50020066258 ],\n \"samples_ts\": [ 2.55997, 2.55707, 2.55897 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T22:50:14Z", + "avg_ns": 30633156657, + "stddev_ns": 14851097, + "avg_ts": 16.713918, + "stddev_ts": 0.008102, + "samples_ns": [ + 30631910740, + 30648590431, + 30618968801 + ], + "samples_ts": [ + 16.7146, + 16.7055, + 16.7217 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T22:52:17Z", + "avg_ns": 50025953000, + "stddev_ns": 28811560, + "avg_ts": 2.558672, + "stddev_ts": 0.001473, + "samples_ns": [ + 50000539437, + 50057253305, + 50020066258 + ], + "samples_ts": [ + 2.55997, + 2.55707, + 2.55897 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1118 + }, + { + "timestamp_utc": "2025-12-10T23:06:55.543453+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:54:49Z\",\n \"avg_ns\": 30656284673,\n \"stddev_ns\": 19216663,\n \"avg_ts\": 16.701311,\n \"stddev_ts\": 0.010466,\n \"samples_ns\": [ 30677971139, 30641372894, 30649509986 ],\n \"samples_ts\": [ 16.6895, 16.7094, 16.705 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T22:56:51Z\",\n \"avg_ns\": 201086688511,\n \"stddev_ns\": 84695206,\n \"avg_ts\": 2.546166,\n \"stddev_ts\": 0.001072,\n \"samples_ns\": [ 201022176186, 201055291733, 201182597615 ],\n \"samples_ts\": [ 2.54698, 2.54656, 2.54495 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T22:54:49Z", + "avg_ns": 30656284673, + "stddev_ns": 19216663, + "avg_ts": 16.701311, + "stddev_ts": 0.010466, + "samples_ns": [ + 30677971139, + 30641372894, + 30649509986 + ], + "samples_ts": [ + 16.6895, + 16.7094, + 16.705 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T22:56:51Z", + "avg_ns": 201086688511, + "stddev_ns": 84695206, + "avg_ts": 2.546166, + "stddev_ts": 0.001072, + "samples_ns": [ + 201022176186, + 201055291733, + 201182597615 + ], + "samples_ts": [ + 2.54698, + 2.54656, + 2.54495 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1119 + }, + { + "timestamp_utc": "2025-12-10T23:09:57.922256+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:06:56Z\",\n \"avg_ns\": 7650567785,\n \"stddev_ns\": 6652217,\n \"avg_ts\": 16.730793,\n \"stddev_ts\": 0.014547,\n \"samples_ns\": [ 7657235561, 7650536558, 7643931236 ],\n \"samples_ts\": [ 16.7162, 16.7309, 16.7453 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:07:27Z\",\n \"avg_ns\": 50040387361,\n \"stddev_ns\": 72093571,\n \"avg_ts\": 2.557937,\n \"stddev_ts\": 0.003685,\n \"samples_ns\": [ 49970090018, 50036921326, 50114150741 ],\n \"samples_ts\": [ 2.56153, 2.55811, 2.55417 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T23:06:56Z", + "avg_ns": 7650567785, + "stddev_ns": 6652217, + "avg_ts": 16.730793, + "stddev_ts": 0.014547, + "samples_ns": [ + 7657235561, + 7650536558, + 7643931236 + ], + "samples_ts": [ + 16.7162, + 16.7309, + 16.7453 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T23:07:27Z", + "avg_ns": 50040387361, + "stddev_ns": 72093571, + "avg_ts": 2.557937, + "stddev_ts": 0.003685, + "samples_ns": [ + 49970090018, + 50036921326, + 50114150741 + ], + "samples_ts": [ + 2.56153, + 2.55811, + 2.55417 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1120 + }, + { + "timestamp_utc": "2025-12-10T23:20:33.424282+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:09:59Z\",\n \"avg_ns\": 7633439262,\n \"stddev_ns\": 8799326,\n \"avg_ts\": 16.768341,\n \"stddev_ts\": 0.019315,\n \"samples_ns\": [ 7628527538, 7643597014, 7628193236 ],\n \"samples_ts\": [ 16.7791, 16.746, 16.7799 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:10:29Z\",\n \"avg_ns\": 201102784385,\n \"stddev_ns\": 58421945,\n \"avg_ts\": 2.545962,\n \"stddev_ts\": 0.000740,\n \"samples_ns\": [ 201162968929, 201099075575, 201046308653 ],\n \"samples_ts\": [ 2.5452, 2.54601, 2.54668 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T23:09:59Z", + "avg_ns": 7633439262, + "stddev_ns": 8799326, + "avg_ts": 16.768341, + "stddev_ts": 0.019315, + "samples_ns": [ + 7628527538, + 7643597014, + 7628193236 + ], + "samples_ts": [ + 16.7791, + 16.746, + 16.7799 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T23:10:29Z", + "avg_ns": 201102784385, + "stddev_ns": 58421945, + "avg_ts": 2.545962, + "stddev_ts": 0.00074, + "samples_ns": [ + 201162968929, + 201099075575, + 201046308653 + ], + "samples_ts": [ + 2.5452, + 2.54601, + 2.54668 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1121 + }, + { + "timestamp_utc": "2025-12-10T23:25:08.181170+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:20:34Z\",\n \"avg_ns\": 30812739799,\n \"stddev_ns\": 53754060,\n \"avg_ts\": 16.616537,\n \"stddev_ts\": 0.028960,\n \"samples_ns\": [ 30874491591, 30776429186, 30787298620 ],\n \"samples_ts\": [ 16.5833, 16.6361, 16.6302 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:22:37Z\",\n \"avg_ns\": 49956697832,\n \"stddev_ns\": 44185311,\n \"avg_ts\": 2.562220,\n \"stddev_ts\": 0.002265,\n \"samples_ns\": [ 49932294896, 50007701548, 49930097054 ],\n \"samples_ts\": [ 2.56347, 2.55961, 2.56358 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T23:20:34Z", + "avg_ns": 30812739799, + "stddev_ns": 53754060, + "avg_ts": 16.616537, + "stddev_ts": 0.02896, + "samples_ns": [ + 30874491591, + 30776429186, + 30787298620 + ], + "samples_ts": [ + 16.5833, + 16.6361, + 16.6302 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T23:22:37Z", + "avg_ns": 49956697832, + "stddev_ns": 44185311, + "avg_ts": 2.56222, + "stddev_ts": 0.002265, + "samples_ns": [ + 49932294896, + 50007701548, + 49930097054 + ], + "samples_ts": [ + 2.56347, + 2.55961, + 2.56358 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1122 + }, + { + "timestamp_utc": "2025-12-10T23:37:15.815469+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:25:09Z\",\n \"avg_ns\": 30877004354,\n \"stddev_ns\": 5152757,\n \"avg_ts\": 16.581920,\n \"stddev_ts\": 0.002764,\n \"samples_ns\": [ 30882616354, 30872504764, 30875891946 ],\n \"samples_ts\": [ 16.5789, 16.5843, 16.5825 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:27:12Z\",\n \"avg_ns\": 200805118624,\n \"stddev_ns\": 84987439,\n \"avg_ts\": 2.549736,\n \"stddev_ts\": 0.001079,\n \"samples_ns\": [ 200714943607, 200816679819, 200883732447 ],\n \"samples_ts\": [ 2.55088, 2.54959, 2.54874 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T23:25:09Z", + "avg_ns": 30877004354, + "stddev_ns": 5152757, + "avg_ts": 16.58192, + "stddev_ts": 0.002764, + "samples_ns": [ + 30882616354, + 30872504764, + 30875891946 + ], + "samples_ts": [ + 16.5789, + 16.5843, + 16.5825 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T23:27:12Z", + "avg_ns": 200805118624, + "stddev_ns": 84987439, + "avg_ts": 2.549736, + "stddev_ts": 0.001079, + "samples_ns": [ + 200714943607, + 200816679819, + 200883732447 + ], + "samples_ts": [ + 2.55088, + 2.54959, + 2.54874 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1123 + }, + { + "timestamp_utc": "2025-12-10T23:40:18.076874+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:37:16Z\",\n \"avg_ns\": 7634413459,\n \"stddev_ns\": 2260235,\n \"avg_ts\": 16.766187,\n \"stddev_ts\": 0.004960,\n \"samples_ns\": [ 7636705387, 7632189844, 7634345147 ],\n \"samples_ts\": [ 16.7612, 16.7711, 16.7663 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:37:47Z\",\n \"avg_ns\": 50017485523,\n \"stddev_ns\": 16238458,\n \"avg_ts\": 2.559105,\n \"stddev_ts\": 0.000831,\n \"samples_ns\": [ 50004005107, 50035508297, 50012943167 ],\n \"samples_ts\": [ 2.55979, 2.55818, 2.55934 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T23:37:16Z", + "avg_ns": 7634413459, + "stddev_ns": 2260235, + "avg_ts": 16.766187, + "stddev_ts": 0.00496, + "samples_ns": [ + 7636705387, + 7632189844, + 7634345147 + ], + "samples_ts": [ + 16.7612, + 16.7711, + 16.7663 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T23:37:47Z", + "avg_ns": 50017485523, + "stddev_ns": 16238458, + "avg_ts": 2.559105, + "stddev_ts": 0.000831, + "samples_ns": [ + 50004005107, + 50035508297, + 50012943167 + ], + "samples_ts": [ + 2.55979, + 2.55818, + 2.55934 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1124 + }, + { + "timestamp_utc": "2025-12-10T23:50:53.034116+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:40:19Z\",\n \"avg_ns\": 7635763119,\n \"stddev_ns\": 5702123,\n \"avg_ts\": 16.763229,\n \"stddev_ts\": 0.012515,\n \"samples_ns\": [ 7641437728, 7635815116, 7630036515 ],\n \"samples_ts\": [ 16.7508, 16.7631, 16.7758 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:40:49Z\",\n \"avg_ns\": 200919262337,\n \"stddev_ns\": 88549722,\n \"avg_ts\": 2.548288,\n \"stddev_ts\": 0.001123,\n \"samples_ns\": [ 200842021527, 200899865121, 201015900365 ],\n \"samples_ts\": [ 2.54927, 2.54853, 2.54706 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T23:40:19Z", + "avg_ns": 7635763119, + "stddev_ns": 5702123, + "avg_ts": 16.763229, + "stddev_ts": 0.012515, + "samples_ns": [ + 7641437728, + 7635815116, + 7630036515 + ], + "samples_ts": [ + 16.7508, + 16.7631, + 16.7758 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T23:40:49Z", + "avg_ns": 200919262337, + "stddev_ns": 88549722, + "avg_ts": 2.548288, + "stddev_ts": 0.001123, + "samples_ns": [ + 200842021527, + 200899865121, + 201015900365 + ], + "samples_ts": [ + 2.54927, + 2.54853, + 2.54706 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1125 + }, + { + "timestamp_utc": "2025-12-10T23:55:30.414900+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:50:54Z\",\n \"avg_ns\": 31475697300,\n \"stddev_ns\": 137718358,\n \"avg_ts\": 16.266726,\n \"stddev_ts\": 0.071142,\n \"samples_ns\": [ 31342979170, 31466189993, 31617922739 ],\n \"samples_ts\": [ 16.3354, 16.2714, 16.1933 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:52:59Z\",\n \"avg_ns\": 49971199482,\n \"stddev_ns\": 13394274,\n \"avg_ts\": 2.561476,\n \"stddev_ts\": 0.000686,\n \"samples_ns\": [ 49956140823, 49975690024, 49981767601 ],\n \"samples_ts\": [ 2.56225, 2.56125, 2.56093 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T23:50:54Z", + "avg_ns": 31475697300, + "stddev_ns": 137718358, + "avg_ts": 16.266726, + "stddev_ts": 0.071142, + "samples_ns": [ + 31342979170, + 31466189993, + 31617922739 + ], + "samples_ts": [ + 16.3354, + 16.2714, + 16.1933 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-10T23:52:59Z", + "avg_ns": 49971199482, + "stddev_ns": 13394274, + "avg_ts": 2.561476, + "stddev_ts": 0.000686, + "samples_ns": [ + 49956140823, + 49975690024, + 49981767601 + ], + "samples_ts": [ + 2.56225, + 2.56125, + 2.56093 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1126 + }, + { + "timestamp_utc": "2025-12-11T00:07:39.988346+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:55:31Z\",\n \"avg_ns\": 31529998624,\n \"stddev_ns\": 158279010,\n \"avg_ts\": 16.238777,\n \"stddev_ts\": 0.081685,\n \"samples_ns\": [ 31353507177, 31577130820, 31659357875 ],\n \"samples_ts\": [ 16.3299, 16.2143, 16.1722 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-10T23:57:37Z\",\n \"avg_ns\": 200650068054,\n \"stddev_ns\": 735301058,\n \"avg_ts\": 2.551729,\n \"stddev_ts\": 0.009361,\n \"samples_ns\": [ 201302988081, 200793649768, 199853566313 ],\n \"samples_ts\": [ 2.54343, 2.54988, 2.56188 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-10T23:55:31Z", + "avg_ns": 31529998624, + "stddev_ns": 158279010, + "avg_ts": 16.238777, + "stddev_ts": 0.081685, + "samples_ns": [ + 31353507177, + 31577130820, + 31659357875 + ], + "samples_ts": [ + 16.3299, + 16.2143, + 16.1722 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-10T23:57:37Z", + "avg_ns": 200650068054, + "stddev_ns": 735301058, + "avg_ts": 2.551729, + "stddev_ts": 0.009361, + "samples_ns": [ + 201302988081, + 200793649768, + 199853566313 + ], + "samples_ts": [ + 2.54343, + 2.54988, + 2.56188 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1127 + }, + { + "timestamp_utc": "2025-12-11T00:10:42.086576+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:07:41Z\",\n \"avg_ns\": 7638830472,\n \"stddev_ns\": 7804236,\n \"avg_ts\": 16.756503,\n \"stddev_ts\": 0.017126,\n \"samples_ns\": [ 7629929657, 7642067436, 7644494325 ],\n \"samples_ts\": [ 16.776, 16.7494, 16.7441 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:08:11Z\",\n \"avg_ns\": 49971484449,\n \"stddev_ns\": 97252139,\n \"avg_ts\": 2.561467,\n \"stddev_ts\": 0.004988,\n \"samples_ns\": [ 49865366318, 49992730227, 50056356804 ],\n \"samples_ts\": [ 2.56691, 2.56037, 2.55712 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T00:07:41Z", + "avg_ns": 7638830472, + "stddev_ns": 7804236, + "avg_ts": 16.756503, + "stddev_ts": 0.017126, + "samples_ns": [ + 7629929657, + 7642067436, + 7644494325 + ], + "samples_ts": [ + 16.776, + 16.7494, + 16.7441 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T00:08:11Z", + "avg_ns": 49971484449, + "stddev_ns": 97252139, + "avg_ts": 2.561467, + "stddev_ts": 0.004988, + "samples_ns": [ + 49865366318, + 49992730227, + 50056356804 + ], + "samples_ts": [ + 2.56691, + 2.56037, + 2.55712 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1128 + }, + { + "timestamp_utc": "2025-12-11T00:21:17.088402+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:10:43Z\",\n \"avg_ns\": 7630998725,\n \"stddev_ns\": 3068989,\n \"avg_ts\": 16.773691,\n \"stddev_ts\": 0.006745,\n \"samples_ns\": [ 7632286806, 7633212429, 7627496941 ],\n \"samples_ts\": [ 16.7709, 16.7688, 16.7814 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:11:13Z\",\n \"avg_ns\": 200941842162,\n \"stddev_ns\": 91946281,\n \"avg_ts\": 2.548001,\n \"stddev_ts\": 0.001166,\n \"samples_ns\": [ 200883032774, 200894696157, 201047797556 ],\n \"samples_ts\": [ 2.54875, 2.5486, 2.54666 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T00:10:43Z", + "avg_ns": 7630998725, + "stddev_ns": 3068989, + "avg_ts": 16.773691, + "stddev_ts": 0.006745, + "samples_ns": [ + 7632286806, + 7633212429, + 7627496941 + ], + "samples_ts": [ + 16.7709, + 16.7688, + 16.7814 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T00:11:13Z", + "avg_ns": 200941842162, + "stddev_ns": 91946281, + "avg_ts": 2.548001, + "stddev_ts": 0.001166, + "samples_ns": [ + 200883032774, + 200894696157, + 201047797556 + ], + "samples_ts": [ + 2.54875, + 2.5486, + 2.54666 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1129 + }, + { + "timestamp_utc": "2025-12-11T00:25:51.136984+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:21:18Z\",\n \"avg_ns\": 30635815542,\n \"stddev_ns\": 21618650,\n \"avg_ts\": 16.712471,\n \"stddev_ts\": 0.011797,\n \"samples_ns\": [ 30648951830, 30647629000, 30610865798 ],\n \"samples_ts\": [ 16.7053, 16.706, 16.7261 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:23:20Z\",\n \"avg_ns\": 49941262458,\n \"stddev_ns\": 35347156,\n \"avg_ts\": 2.563012,\n \"stddev_ts\": 0.001815,\n \"samples_ns\": [ 49901802670, 49970023708, 49951960997 ],\n \"samples_ts\": [ 2.56504, 2.56154, 2.56246 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T00:21:18Z", + "avg_ns": 30635815542, + "stddev_ns": 21618650, + "avg_ts": 16.712471, + "stddev_ts": 0.011797, + "samples_ns": [ + 30648951830, + 30647629000, + 30610865798 + ], + "samples_ts": [ + 16.7053, + 16.706, + 16.7261 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T00:23:20Z", + "avg_ns": 49941262458, + "stddev_ns": 35347156, + "avg_ts": 2.563012, + "stddev_ts": 0.001815, + "samples_ns": [ + 49901802670, + 49970023708, + 49951960997 + ], + "samples_ts": [ + 2.56504, + 2.56154, + 2.56246 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1130 + }, + { + "timestamp_utc": "2025-12-11T00:37:37.542340+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:25:52Z\",\n \"avg_ns\": 30584258390,\n \"stddev_ns\": 4609355,\n \"avg_ts\": 16.740638,\n \"stddev_ts\": 0.002523,\n \"samples_ns\": [ 30588539333, 30584856769, 30579379068 ],\n \"samples_ts\": [ 16.7383, 16.7403, 16.7433 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:27:54Z\",\n \"avg_ns\": 194139920981,\n \"stddev_ns\": 1397297804,\n \"avg_ts\": 2.637364,\n \"stddev_ts\": 0.018906,\n \"samples_ns\": [ 195748394946, 193225912497, 193445455500 ],\n \"samples_ts\": [ 2.6156, 2.64975, 2.64674 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T00:25:52Z", + "avg_ns": 30584258390, + "stddev_ns": 4609355, + "avg_ts": 16.740638, + "stddev_ts": 0.002523, + "samples_ns": [ + 30588539333, + 30584856769, + 30579379068 + ], + "samples_ts": [ + 16.7383, + 16.7403, + 16.7433 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T00:27:54Z", + "avg_ns": 194139920981, + "stddev_ns": 1397297804, + "avg_ts": 2.637364, + "stddev_ts": 0.018906, + "samples_ns": [ + 195748394946, + 193225912497, + 193445455500 + ], + "samples_ts": [ + 2.6156, + 2.64975, + 2.64674 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1131 + }, + { + "timestamp_utc": "2025-12-11T00:40:38.575941+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:37:38Z\",\n \"avg_ns\": 7646694218,\n \"stddev_ns\": 3795546,\n \"avg_ts\": 16.739262,\n \"stddev_ts\": 0.008308,\n \"samples_ns\": [ 7650031472, 7647484367, 7642566816 ],\n \"samples_ts\": [ 16.732, 16.7375, 16.7483 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:38:09Z\",\n \"avg_ns\": 49607841184,\n \"stddev_ns\": 473591824,\n \"avg_ts\": 2.580395,\n \"stddev_ts\": 0.024768,\n \"samples_ns\": [ 49848489412, 49912788090, 49062246052 ],\n \"samples_ts\": [ 2.56778, 2.56447, 2.60893 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T00:37:38Z", + "avg_ns": 7646694218, + "stddev_ns": 3795546, + "avg_ts": 16.739262, + "stddev_ts": 0.008308, + "samples_ns": [ + 7650031472, + 7647484367, + 7642566816 + ], + "samples_ts": [ + 16.732, + 16.7375, + 16.7483 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T00:38:09Z", + "avg_ns": 49607841184, + "stddev_ns": 473591824, + "avg_ts": 2.580395, + "stddev_ts": 0.024768, + "samples_ns": [ + 49848489412, + 49912788090, + 49062246052 + ], + "samples_ts": [ + 2.56778, + 2.56447, + 2.60893 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1132 + }, + { + "timestamp_utc": "2025-12-11T00:51:05.570143+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:40:39Z\",\n \"avg_ns\": 7628493364,\n \"stddev_ns\": 1848475,\n \"avg_ts\": 16.779199,\n \"stddev_ts\": 0.004057,\n \"samples_ns\": [ 7629706977, 7626370985, 7629402132 ],\n \"samples_ts\": [ 16.7765, 16.7839, 16.7772 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:41:10Z\",\n \"avg_ns\": 198278500906,\n \"stddev_ns\": 3855045812,\n \"avg_ts\": 2.582472,\n \"stddev_ts\": 0.030745,\n \"samples_ns\": [ 200974207774, 197364097755, 196497197191 ],\n \"samples_ts\": [ 2.54759, 2.59419, 2.60564 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T00:40:39Z", + "avg_ns": 7628493364, + "stddev_ns": 1848475, + "avg_ts": 16.779199, + "stddev_ts": 0.004057, + "samples_ns": [ + 7629706977, + 7626370985, + 7629402132 + ], + "samples_ts": [ + 16.7765, + 16.7839, + 16.7772 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T00:41:10Z", + "avg_ns": 198278500906, + "stddev_ns": 3855045812, + "avg_ts": 2.582472, + "stddev_ts": 0.030745, + "samples_ns": [ + 200974207774, + 197364097755, + 196497197191 + ], + "samples_ts": [ + 2.54759, + 2.59419, + 2.60564 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1133 + }, + { + "timestamp_utc": "2025-12-11T00:55:39.167054+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:51:06Z\",\n \"avg_ns\": 30785894986,\n \"stddev_ns\": 8588980,\n \"avg_ts\": 16.630994,\n \"stddev_ts\": 0.004639,\n \"samples_ns\": [ 30792721766, 30776253747, 30788709446 ],\n \"samples_ts\": [ 16.6273, 16.6362, 16.6295 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:53:09Z\",\n \"avg_ns\": 49595050861,\n \"stddev_ns\": 538709562,\n \"avg_ts\": 2.581107,\n \"stddev_ts\": 0.028213,\n \"samples_ns\": [ 49901399059, 49910727516, 48973026009 ],\n \"samples_ts\": [ 2.56506, 2.56458, 2.61368 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T00:51:06Z", + "avg_ns": 30785894986, + "stddev_ns": 8588980, + "avg_ts": 16.630994, + "stddev_ts": 0.004639, + "samples_ns": [ + 30792721766, + 30776253747, + 30788709446 + ], + "samples_ts": [ + 16.6273, + 16.6362, + 16.6295 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T00:53:09Z", + "avg_ns": 49595050861, + "stddev_ns": 538709562, + "avg_ts": 2.581107, + "stddev_ts": 0.028213, + "samples_ns": [ + 49901399059, + 49910727516, + 48973026009 + ], + "samples_ts": [ + 2.56506, + 2.56458, + 2.61368 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1134 + }, + { + "timestamp_utc": "2025-12-11T01:07:30.165309+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:55:40Z\",\n \"avg_ns\": 30764043750,\n \"stddev_ns\": 8579008,\n \"avg_ts\": 16.642806,\n \"stddev_ts\": 0.004640,\n \"samples_ns\": [ 30762400125, 30773323843, 30756407283 ],\n \"samples_ts\": [ 16.6437, 16.6378, 16.6469 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T00:57:43Z\",\n \"avg_ns\": 195392272297,\n \"stddev_ns\": 3980101918,\n \"avg_ts\": 2.621086,\n \"stddev_ts\": 0.052773,\n \"samples_ns\": [ 199985737669, 193223086428, 192967992794 ],\n \"samples_ts\": [ 2.56018, 2.64979, 2.65329 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T00:55:40Z", + "avg_ns": 30764043750, + "stddev_ns": 8579008, + "avg_ts": 16.642806, + "stddev_ts": 0.00464, + "samples_ns": [ + 30762400125, + 30773323843, + 30756407283 + ], + "samples_ts": [ + 16.6437, + 16.6378, + 16.6469 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T00:57:43Z", + "avg_ns": 195392272297, + "stddev_ns": 3980101918, + "avg_ts": 2.621086, + "stddev_ts": 0.052773, + "samples_ns": [ + 199985737669, + 193223086428, + 192967992794 + ], + "samples_ts": [ + 2.56018, + 2.64979, + 2.65329 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1135 + }, + { + "timestamp_utc": "2025-12-11T01:10:29.861548+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:07:31Z\",\n \"avg_ns\": 7634091298,\n \"stddev_ns\": 6961445,\n \"avg_ts\": 16.766903,\n \"stddev_ts\": 0.015294,\n \"samples_ns\": [ 7626134769, 7639052760, 7637086367 ],\n \"samples_ts\": [ 16.7844, 16.756, 16.7603 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:08:01Z\",\n \"avg_ns\": 49168462158,\n \"stddev_ns\": 1044574448,\n \"avg_ts\": 2.604088,\n \"stddev_ts\": 0.056007,\n \"samples_ns\": [ 49805695715, 49736742117, 47962948642 ],\n \"samples_ts\": [ 2.56999, 2.57355, 2.66873 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T01:07:31Z", + "avg_ns": 7634091298, + "stddev_ns": 6961445, + "avg_ts": 16.766903, + "stddev_ts": 0.015294, + "samples_ns": [ + 7626134769, + 7639052760, + 7637086367 + ], + "samples_ts": [ + 16.7844, + 16.756, + 16.7603 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T01:08:01Z", + "avg_ns": 49168462158, + "stddev_ns": 1044574448, + "avg_ts": 2.604088, + "stddev_ts": 0.056007, + "samples_ns": [ + 49805695715, + 49736742117, + 47962948642 + ], + "samples_ts": [ + 2.56999, + 2.57355, + 2.66873 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1136 + }, + { + "timestamp_utc": "2025-12-11T01:20:46.010267+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:10:30Z\",\n \"avg_ns\": 7630301112,\n \"stddev_ns\": 3098051,\n \"avg_ts\": 16.775224,\n \"stddev_ts\": 0.006806,\n \"samples_ns\": [ 7633132174, 7630775419, 7626995745 ],\n \"samples_ts\": [ 16.769, 16.7742, 16.7825 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:11:01Z\",\n \"avg_ns\": 194662457468,\n \"stddev_ns\": 3810917266,\n \"avg_ts\": 2.630437,\n \"stddev_ts\": 0.030897,\n \"samples_ns\": [ 197320705433, 193330821936, 193335845037 ],\n \"samples_ts\": [ 2.59476, 2.64831, 2.64824 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T01:10:30Z", + "avg_ns": 7630301112, + "stddev_ns": 3098051, + "avg_ts": 16.775224, + "stddev_ts": 0.006806, + "samples_ns": [ + 7633132174, + 7630775419, + 7626995745 + ], + "samples_ts": [ + 16.769, + 16.7742, + 16.7825 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T01:11:01Z", + "avg_ns": 194662457468, + "stddev_ns": 3810917266, + "avg_ts": 2.630437, + "stddev_ts": 0.030897, + "samples_ns": [ + 197320705433, + 193330821936, + 193335845037 + ], + "samples_ts": [ + 2.59476, + 2.64831, + 2.64824 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1137 + }, + { + "timestamp_utc": "2025-12-11T01:25:21.282228+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:20:47Z\",\n \"avg_ns\": 31374152511,\n \"stddev_ns\": 19142770,\n \"avg_ts\": 16.319170,\n \"stddev_ts\": 0.009956,\n \"samples_ns\": [ 31372198935, 31394197160, 31356061438 ],\n \"samples_ts\": [ 16.3202, 16.3087, 16.3286 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:22:52Z\",\n \"avg_ns\": 49384173690,\n \"stddev_ns\": 1004817551,\n \"avg_ts\": 2.592648,\n \"stddev_ts\": 0.053380,\n \"samples_ns\": [ 49960362651, 49968239130, 48223919291 ],\n \"samples_ts\": [ 2.56203, 2.56163, 2.65428 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T01:20:47Z", + "avg_ns": 31374152511, + "stddev_ns": 19142770, + "avg_ts": 16.31917, + "stddev_ts": 0.009956, + "samples_ns": [ + 31372198935, + 31394197160, + 31356061438 + ], + "samples_ts": [ + 16.3202, + 16.3087, + 16.3286 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T01:22:52Z", + "avg_ns": 49384173690, + "stddev_ns": 1004817551, + "avg_ts": 2.592648, + "stddev_ts": 0.05338, + "samples_ns": [ + 49960362651, + 49968239130, + 48223919291 + ], + "samples_ts": [ + 2.56203, + 2.56163, + 2.65428 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1138 + }, + { + "timestamp_utc": "2025-12-11T01:37:12.683532+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:25:22Z\",\n \"avg_ns\": 31393662959,\n \"stddev_ns\": 8732379,\n \"avg_ts\": 16.309025,\n \"stddev_ts\": 0.004536,\n \"samples_ns\": [ 31398235478, 31383595819, 31399157581 ],\n \"samples_ts\": [ 16.3066, 16.3143, 16.3062 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:27:28Z\",\n \"avg_ns\": 194709982048,\n \"stddev_ns\": 3777504221,\n \"avg_ts\": 2.629784,\n \"stddev_ts\": 0.030145,\n \"samples_ns\": [ 197295004763, 193602968076, 193231973305 ],\n \"samples_ts\": [ 2.5951, 2.64459, 2.64967 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T01:25:22Z", + "avg_ns": 31393662959, + "stddev_ns": 8732379, + "avg_ts": 16.309025, + "stddev_ts": 0.004536, + "samples_ns": [ + 31398235478, + 31383595819, + 31399157581 + ], + "samples_ts": [ + 16.3066, + 16.3143, + 16.3062 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T01:27:28Z", + "avg_ns": 194709982048, + "stddev_ns": 3777504221, + "avg_ts": 2.629784, + "stddev_ts": 0.030145, + "samples_ns": [ + 197295004763, + 193602968076, + 193231973305 + ], + "samples_ts": [ + 2.5951, + 2.64459, + 2.64967 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1139 + }, + { + "timestamp_utc": "2025-12-11T01:40:10.167752+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:37:13Z\",\n \"avg_ns\": 7633997301,\n \"stddev_ns\": 5076031,\n \"avg_ts\": 16.767105,\n \"stddev_ts\": 0.011153,\n \"samples_ns\": [ 7628227553, 7635988527, 7637775823 ],\n \"samples_ts\": [ 16.7798, 16.7627, 16.7588 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:37:44Z\",\n \"avg_ns\": 48430790075,\n \"stddev_ns\": 692738499,\n \"avg_ts\": 2.643304,\n \"stddev_ts\": 0.037500,\n \"samples_ns\": [ 49230634526, 48022310911, 48039424789 ],\n \"samples_ts\": [ 2.60001, 2.66543, 2.66448 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T01:37:13Z", + "avg_ns": 7633997301, + "stddev_ns": 5076031, + "avg_ts": 16.767105, + "stddev_ts": 0.011153, + "samples_ns": [ + 7628227553, + 7635988527, + 7637775823 + ], + "samples_ts": [ + 16.7798, + 16.7627, + 16.7588 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T01:37:44Z", + "avg_ns": 48430790075, + "stddev_ns": 692738499, + "avg_ts": 2.643304, + "stddev_ts": 0.0375, + "samples_ns": [ + 49230634526, + 48022310911, + 48039424789 + ], + "samples_ts": [ + 2.60001, + 2.66543, + 2.66448 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1140 + }, + { + "timestamp_utc": "2025-12-11T01:50:25.681683+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:40:11Z\",\n \"avg_ns\": 7633639701,\n \"stddev_ns\": 7283283,\n \"avg_ts\": 16.767896,\n \"stddev_ts\": 0.015998,\n \"samples_ns\": [ 7641009262, 7633463965, 7626445876 ],\n \"samples_ts\": [ 16.7517, 16.7683, 16.7837 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:40:41Z\",\n \"avg_ns\": 194445498887,\n \"stddev_ns\": 3329484067,\n \"avg_ts\": 2.633215,\n \"stddev_ts\": 0.018408,\n \"samples_ns\": [ 196014076463, 193531729538, 193790690661 ],\n \"samples_ts\": [ 2.61206, 2.64556, 2.64203 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T01:40:11Z", + "avg_ns": 7633639701, + "stddev_ns": 7283283, + "avg_ts": 16.767896, + "stddev_ts": 0.015998, + "samples_ns": [ + 7641009262, + 7633463965, + 7626445876 + ], + "samples_ts": [ + 16.7517, + 16.7683, + 16.7837 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T01:40:41Z", + "avg_ns": 194445498887, + "stddev_ns": 3329484067, + "avg_ts": 2.633215, + "stddev_ts": 0.018408, + "samples_ns": [ + 196014076463, + 193531729538, + 193790690661 + ], + "samples_ts": [ + 2.61206, + 2.64556, + 2.64203 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1141 + }, + { + "timestamp_utc": "2025-12-11T01:54:54.988121+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:50:26Z\",\n \"avg_ns\": 30638983936,\n \"stddev_ns\": 36177203,\n \"avg_ts\": 16.710752,\n \"stddev_ts\": 0.019744,\n \"samples_ns\": [ 30661663770, 30658024577, 30597263462 ],\n \"samples_ts\": [ 16.6984, 16.7004, 16.7335 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:52:29Z\",\n \"avg_ns\": 48382254912,\n \"stddev_ns\": 483320706,\n \"avg_ts\": 2.645773,\n \"stddev_ts\": 0.026279,\n \"samples_ns\": [ 48940339423, 48100950731, 48105474583 ],\n \"samples_ts\": [ 2.61543, 2.66107, 2.66082 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T01:50:26Z", + "avg_ns": 30638983936, + "stddev_ns": 36177203, + "avg_ts": 16.710752, + "stddev_ts": 0.019744, + "samples_ns": [ + 30661663770, + 30658024577, + 30597263462 + ], + "samples_ts": [ + 16.6984, + 16.7004, + 16.7335 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T01:52:29Z", + "avg_ns": 48382254912, + "stddev_ns": 483320706, + "avg_ts": 2.645773, + "stddev_ts": 0.026279, + "samples_ns": [ + 48940339423, + 48100950731, + 48105474583 + ], + "samples_ts": [ + 2.61543, + 2.66107, + 2.66082 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1142 + }, + { + "timestamp_utc": "2025-12-11T02:06:38.509664+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:54:56Z\",\n \"avg_ns\": 30573452633,\n \"stddev_ns\": 19259208,\n \"avg_ts\": 16.746559,\n \"stddev_ts\": 0.010547,\n \"samples_ns\": [ 30594889232, 30567859884, 30557608783 ],\n \"samples_ts\": [ 16.7348, 16.7496, 16.7552 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T01:56:58Z\",\n \"avg_ns\": 193187950669,\n \"stddev_ns\": 99988687,\n \"avg_ts\": 2.650269,\n \"stddev_ts\": 0.001372,\n \"samples_ns\": [ 193095284665, 193293928423, 193174638919 ],\n \"samples_ts\": [ 2.65154, 2.64882, 2.65045 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T01:54:56Z", + "avg_ns": 30573452633, + "stddev_ns": 19259208, + "avg_ts": 16.746559, + "stddev_ts": 0.010547, + "samples_ns": [ + 30594889232, + 30567859884, + 30557608783 + ], + "samples_ts": [ + 16.7348, + 16.7496, + 16.7552 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T01:56:58Z", + "avg_ns": 193187950669, + "stddev_ns": 99988687, + "avg_ts": 2.650269, + "stddev_ts": 0.001372, + "samples_ns": [ + 193095284665, + 193293928423, + 193174638919 + ], + "samples_ts": [ + 2.65154, + 2.64882, + 2.65045 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1143 + }, + { + "timestamp_utc": "2025-12-11T02:09:34.921695+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:06:39Z\",\n \"avg_ns\": 7642833359,\n \"stddev_ns\": 11728919,\n \"avg_ts\": 16.747742,\n \"stddev_ts\": 0.025705,\n \"samples_ns\": [ 7654194617, 7643537001, 7630768459 ],\n \"samples_ts\": [ 16.7229, 16.7462, 16.7742 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:07:10Z\",\n \"avg_ns\": 48069709100,\n \"stddev_ns\": 51861608,\n \"avg_ts\": 2.662802,\n \"stddev_ts\": 0.002874,\n \"samples_ns\": [ 48114729848, 48081395620, 48013001833 ],\n \"samples_ts\": [ 2.66031, 2.66215, 2.66594 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T02:06:39Z", + "avg_ns": 7642833359, + "stddev_ns": 11728919, + "avg_ts": 16.747742, + "stddev_ts": 0.025705, + "samples_ns": [ + 7654194617, + 7643537001, + 7630768459 + ], + "samples_ts": [ + 16.7229, + 16.7462, + 16.7742 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T02:07:10Z", + "avg_ns": 48069709100, + "stddev_ns": 51861608, + "avg_ts": 2.662802, + "stddev_ts": 0.002874, + "samples_ns": [ + 48114729848, + 48081395620, + 48013001833 + ], + "samples_ts": [ + 2.66031, + 2.66215, + 2.66594 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1144 + }, + { + "timestamp_utc": "2025-12-11T02:19:47.531948+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:09:36Z\",\n \"avg_ns\": 7632553576,\n \"stddev_ns\": 3206856,\n \"avg_ts\": 16.770274,\n \"stddev_ts\": 0.007045,\n \"samples_ns\": [ 7636170132, 7631434006, 7630056590 ],\n \"samples_ts\": [ 16.7623, 16.7727, 16.7758 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:10:06Z\",\n \"avg_ns\": 193471997449,\n \"stddev_ns\": 38672581,\n \"avg_ts\": 2.646378,\n \"stddev_ts\": 0.000529,\n \"samples_ns\": [ 193448115302, 193516609824, 193451267223 ],\n \"samples_ts\": [ 2.6467, 2.64577, 2.64666 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T02:09:36Z", + "avg_ns": 7632553576, + "stddev_ns": 3206856, + "avg_ts": 16.770274, + "stddev_ts": 0.007045, + "samples_ns": [ + 7636170132, + 7631434006, + 7630056590 + ], + "samples_ts": [ + 16.7623, + 16.7727, + 16.7758 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T02:10:06Z", + "avg_ns": 193471997449, + "stddev_ns": 38672581, + "avg_ts": 2.646378, + "stddev_ts": 0.000529, + "samples_ns": [ + 193448115302, + 193516609824, + 193451267223 + ], + "samples_ts": [ + 2.6467, + 2.64577, + 2.64666 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1145 + }, + { + "timestamp_utc": "2025-12-11T02:24:16.221904+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:19:48Z\",\n \"avg_ns\": 30755652883,\n \"stddev_ns\": 16174325,\n \"avg_ts\": 16.647349,\n \"stddev_ts\": 0.008752,\n \"samples_ns\": [ 30774139683, 30744110030, 30748708936 ],\n \"samples_ts\": [ 16.6373, 16.6536, 16.6511 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:21:51Z\",\n \"avg_ns\": 47976494641,\n \"stddev_ns\": 34139327,\n \"avg_ts\": 2.667974,\n \"stddev_ts\": 0.001898,\n \"samples_ns\": [ 47955455163, 47958145661, 48015883101 ],\n \"samples_ts\": [ 2.66914, 2.66899, 2.66578 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T02:19:48Z", + "avg_ns": 30755652883, + "stddev_ns": 16174325, + "avg_ts": 16.647349, + "stddev_ts": 0.008752, + "samples_ns": [ + 30774139683, + 30744110030, + 30748708936 + ], + "samples_ts": [ + 16.6373, + 16.6536, + 16.6511 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T02:21:51Z", + "avg_ns": 47976494641, + "stddev_ns": 34139327, + "avg_ts": 2.667974, + "stddev_ts": 0.001898, + "samples_ns": [ + 47955455163, + 47958145661, + 48015883101 + ], + "samples_ts": [ + 2.66914, + 2.66899, + 2.66578 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1146 + }, + { + "timestamp_utc": "2025-12-11T02:35:59.950979+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:24:17Z\",\n \"avg_ns\": 30819536973,\n \"stddev_ns\": 9018194,\n \"avg_ts\": 16.612840,\n \"stddev_ts\": 0.004860,\n \"samples_ns\": [ 30825600032, 30823833178, 30809177711 ],\n \"samples_ts\": [ 16.6096, 16.6105, 16.6184 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:26:20Z\",\n \"avg_ns\": 192924108227,\n \"stddev_ns\": 48506150,\n \"avg_ts\": 2.653893,\n \"stddev_ts\": 0.000667,\n \"samples_ns\": [ 192971443895, 192874510728, 192926370058 ],\n \"samples_ts\": [ 2.65324, 2.65458, 2.65386 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T02:24:17Z", + "avg_ns": 30819536973, + "stddev_ns": 9018194, + "avg_ts": 16.61284, + "stddev_ts": 0.00486, + "samples_ns": [ + 30825600032, + 30823833178, + 30809177711 + ], + "samples_ts": [ + 16.6096, + 16.6105, + 16.6184 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T02:26:20Z", + "avg_ns": 192924108227, + "stddev_ns": 48506150, + "avg_ts": 2.653893, + "stddev_ts": 0.000667, + "samples_ns": [ + 192971443895, + 192874510728, + 192926370058 + ], + "samples_ts": [ + 2.65324, + 2.65458, + 2.65386 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1147 + }, + { + "timestamp_utc": "2025-12-11T02:38:56.146978+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:36:01Z\",\n \"avg_ns\": 7631657699,\n \"stddev_ns\": 3019975,\n \"avg_ts\": 16.772242,\n \"stddev_ts\": 0.006638,\n \"samples_ns\": [ 7632202234, 7628402503, 7634368360 ],\n \"samples_ts\": [ 16.771, 16.7794, 16.7663 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:36:31Z\",\n \"avg_ns\": 48014076847,\n \"stddev_ns\": 11888240,\n \"avg_ts\": 2.665885,\n \"stddev_ts\": 0.000660,\n \"samples_ns\": [ 48006505533, 48007946020, 48027778988 ],\n \"samples_ts\": [ 2.66631, 2.66623, 2.66512 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T02:36:01Z", + "avg_ns": 7631657699, + "stddev_ns": 3019975, + "avg_ts": 16.772242, + "stddev_ts": 0.006638, + "samples_ns": [ + 7632202234, + 7628402503, + 7634368360 + ], + "samples_ts": [ + 16.771, + 16.7794, + 16.7663 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T02:36:31Z", + "avg_ns": 48014076847, + "stddev_ns": 11888240, + "avg_ts": 2.665885, + "stddev_ts": 0.00066, + "samples_ns": [ + 48006505533, + 48007946020, + 48027778988 + ], + "samples_ts": [ + 2.66631, + 2.66623, + 2.66512 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1148 + }, + { + "timestamp_utc": "2025-12-11T02:49:07.507729+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:38:57Z\",\n \"avg_ns\": 7636872804,\n \"stddev_ns\": 7166388,\n \"avg_ts\": 16.760797,\n \"stddev_ts\": 0.015720,\n \"samples_ns\": [ 7632558298, 7632914838, 7645145276 ],\n \"samples_ts\": [ 16.7703, 16.7695, 16.7427 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:39:27Z\",\n \"avg_ns\": 193053031254,\n \"stddev_ns\": 37147177,\n \"avg_ts\": 2.652121,\n \"stddev_ts\": 0.000510,\n \"samples_ns\": [ 193012048525, 193084487323, 193062557914 ],\n \"samples_ts\": [ 2.65268, 2.65169, 2.65199 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T02:38:57Z", + "avg_ns": 7636872804, + "stddev_ns": 7166388, + "avg_ts": 16.760797, + "stddev_ts": 0.01572, + "samples_ns": [ + 7632558298, + 7632914838, + 7645145276 + ], + "samples_ts": [ + 16.7703, + 16.7695, + 16.7427 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T02:39:27Z", + "avg_ns": 193053031254, + "stddev_ns": 37147177, + "avg_ts": 2.652121, + "stddev_ts": 0.00051, + "samples_ns": [ + 193012048525, + 193084487323, + 193062557914 + ], + "samples_ts": [ + 2.65268, + 2.65169, + 2.65199 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1149 + }, + { + "timestamp_utc": "2025-12-11T02:53:38.764355+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:49:08Z\",\n \"avg_ns\": 31442352646,\n \"stddev_ns\": 9048778,\n \"avg_ts\": 16.283770,\n \"stddev_ts\": 0.004686,\n \"samples_ns\": [ 31433265566, 31451362631, 31442429741 ],\n \"samples_ts\": [ 16.2885, 16.2791, 16.2837 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:51:14Z\",\n \"avg_ns\": 47968698482,\n \"stddev_ns\": 54492095,\n \"avg_ts\": 2.668409,\n \"stddev_ts\": 0.003030,\n \"samples_ns\": [ 48030627893, 47928097718, 47947369837 ],\n \"samples_ts\": [ 2.66497, 2.67067, 2.66959 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T02:49:08Z", + "avg_ns": 31442352646, + "stddev_ns": 9048778, + "avg_ts": 16.28377, + "stddev_ts": 0.004686, + "samples_ns": [ + 31433265566, + 31451362631, + 31442429741 + ], + "samples_ts": [ + 16.2885, + 16.2791, + 16.2837 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T02:51:14Z", + "avg_ns": 47968698482, + "stddev_ns": 54492095, + "avg_ts": 2.668409, + "stddev_ts": 0.00303, + "samples_ns": [ + 48030627893, + 47928097718, + 47947369837 + ], + "samples_ts": [ + 2.66497, + 2.67067, + 2.66959 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1150 + }, + { + "timestamp_utc": "2025-12-11T03:05:25.717734+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:53:39Z\",\n \"avg_ns\": 31306520331,\n \"stddev_ns\": 5785420,\n \"avg_ts\": 16.354421,\n \"stddev_ts\": 0.003022,\n \"samples_ns\": [ 31302921343, 31313193894, 31303445756 ],\n \"samples_ts\": [ 16.3563, 16.3509, 16.356 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf\",\n \"model_type\": \"gemma3 4B Q8_0\",\n \"model_size\": 4123860992,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T02:55:45Z\",\n \"avg_ns\": 193341454172,\n \"stddev_ns\": 97847045,\n \"avg_ts\": 2.648165,\n \"stddev_ts\": 0.001340,\n \"samples_ns\": [ 193440329910, 193339359193, 193244673415 ],\n \"samples_ts\": [ 2.64681, 2.64819, 2.64949 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T02:53:39Z", + "avg_ns": 31306520331, + "stddev_ns": 5785420, + "avg_ts": 16.354421, + "stddev_ts": 0.003022, + "samples_ns": [ + 31302921343, + 31313193894, + 31303445756 + ], + "samples_ts": [ + 16.3563, + 16.3509, + 16.356 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_type": "gemma3 4B Q8_0", + "model_size": 4123860992, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T02:55:45Z", + "avg_ns": 193341454172, + "stddev_ns": 97847045, + "avg_ts": 2.648165, + "stddev_ts": 0.00134, + "samples_ns": [ + 193440329910, + 193339359193, + 193244673415 + ], + "samples_ts": [ + 2.64681, + 2.64819, + 2.64949 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q8_0.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q8_0", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1151 + }, + { + "timestamp_utc": "2025-12-11T03:12:26.488983+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:05:47Z\",\n \"avg_ns\": 50262873123,\n \"stddev_ns\": 1220963,\n \"avg_ts\": 2.546611,\n \"stddev_ts\": 0.000062,\n \"samples_ns\": [ 50263912551, 50263178299, 50261528519 ],\n \"samples_ts\": [ 2.54656, 2.5466, 2.54668 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:09:08Z\",\n \"avg_ns\": 65849611514,\n \"stddev_ns\": 13903673,\n \"avg_ts\": 1.943823,\n \"stddev_ts\": 0.000410,\n \"samples_ns\": [ 65865657522, 65841134231, 65842042789 ],\n \"samples_ts\": [ 1.94335, 1.94407, 1.94405 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T03:05:47Z", + "avg_ns": 50262873123, + "stddev_ns": 1220963, + "avg_ts": 2.546611, + "stddev_ts": 6.2e-05, + "samples_ns": [ + 50263912551, + 50263178299, + 50261528519 + ], + "samples_ts": [ + 2.54656, + 2.5466, + 2.54668 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T03:09:08Z", + "avg_ns": 65849611514, + "stddev_ns": 13903673, + "avg_ts": 1.943823, + "stddev_ts": 0.00041, + "samples_ns": [ + 65865657522, + 65841134231, + 65842042789 + ], + "samples_ts": [ + 1.94335, + 1.94407, + 1.94405 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1152 + }, + { + "timestamp_utc": "2025-12-11T03:29:08.283334+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:12:27Z\",\n \"avg_ns\": 50240595039,\n \"stddev_ns\": 518669,\n \"avg_ts\": 2.547741,\n \"stddev_ts\": 0.000024,\n \"samples_ns\": [ 50240416958, 50241125660, 50240242500 ],\n \"samples_ts\": [ 2.54775, 2.54771, 2.54776 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:15:48Z\",\n \"avg_ns\": 266350634154,\n \"stddev_ns\": 3410962,\n \"avg_ts\": 1.922278,\n \"stddev_ts\": 0.000024,\n \"samples_ns\": [ 266354395651, 266348053174, 266349453639 ],\n \"samples_ts\": [ 1.92225, 1.9223, 1.92229 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T03:12:27Z", + "avg_ns": 50240595039, + "stddev_ns": 518669, + "avg_ts": 2.547741, + "stddev_ts": 2.4e-05, + "samples_ns": [ + 50240416958, + 50241125660, + 50240242500 + ], + "samples_ts": [ + 2.54775, + 2.54771, + 2.54776 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T03:15:48Z", + "avg_ns": 266350634154, + "stddev_ns": 3410962, + "avg_ts": 1.922278, + "stddev_ts": 2.4e-05, + "samples_ns": [ + 266354395651, + 266348053174, + 266349453639 + ], + "samples_ts": [ + 1.92225, + 1.9223, + 1.92229 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1153 + }, + { + "timestamp_utc": "2025-12-11T03:45:53.789061+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:29:09Z\",\n \"avg_ns\": 201481637539,\n \"stddev_ns\": 453928,\n \"avg_ts\": 2.541175,\n \"stddev_ts\": 0.000001,\n \"samples_ns\": [ 201481612552, 201481714073, 201481585993 ],\n \"samples_ts\": [ 2.54117, 2.54117, 2.54118 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:42:35Z\",\n \"avg_ns\": 65962037292,\n \"stddev_ns\": 8225890,\n \"avg_ts\": 1.940510,\n \"stddev_ts\": 0.000242,\n \"samples_ns\": [ 65970061240, 65953630833, 65962419804 ],\n \"samples_ts\": [ 1.94027, 1.94076, 1.9405 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T03:29:09Z", + "avg_ns": 201481637539, + "stddev_ns": 453928, + "avg_ts": 2.541175, + "stddev_ts": 1e-06, + "samples_ns": [ + 201481612552, + 201481714073, + 201481585993 + ], + "samples_ts": [ + 2.54117, + 2.54117, + 2.54118 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T03:42:35Z", + "avg_ns": 65962037292, + "stddev_ns": 8225890, + "avg_ts": 1.94051, + "stddev_ts": 0.000242, + "samples_ns": [ + 65970061240, + 65953630833, + 65962419804 + ], + "samples_ts": [ + 1.94027, + 1.94076, + 1.9405 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1154 + }, + { + "timestamp_utc": "2025-12-11T04:12:38.115698+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:45:54Z\",\n \"avg_ns\": 201500254016,\n \"stddev_ns\": 1405445,\n \"avg_ts\": 2.540940,\n \"stddev_ts\": 0.000017,\n \"samples_ns\": [ 201500967397, 201498717456, 201501077196 ],\n \"samples_ts\": [ 2.54093, 2.54096, 2.54093 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T03:59:20Z\",\n \"avg_ns\": 265539531337,\n \"stddev_ns\": 884980,\n \"avg_ts\": 1.928150,\n \"stddev_ts\": 0.000004,\n \"samples_ns\": [ 265539595780, 265539998112, 265539000121 ],\n \"samples_ts\": [ 1.92815, 1.92815, 1.92815 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T03:45:54Z", + "avg_ns": 201500254016, + "stddev_ns": 1405445, + "avg_ts": 2.54094, + "stddev_ts": 1.7e-05, + "samples_ns": [ + 201500967397, + 201498717456, + 201501077196 + ], + "samples_ts": [ + 2.54093, + 2.54096, + 2.54093 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T03:59:20Z", + "avg_ns": 265539531337, + "stddev_ns": 884980, + "avg_ts": 1.92815, + "stddev_ts": 4e-06, + "samples_ns": [ + 265539595780, + 265539998112, + 265539000121 + ], + "samples_ts": [ + 1.92815, + 1.92815, + 1.92815 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1155 + }, + { + "timestamp_utc": "2025-12-11T04:19:19.058150+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:12:39Z\",\n \"avg_ns\": 50247178934,\n \"stddev_ns\": 431180,\n \"avg_ts\": 2.547407,\n \"stddev_ts\": 0.000015,\n \"samples_ns\": [ 50247257661, 50247423779, 50246855364 ],\n \"samples_ts\": [ 2.5474, 2.54739, 2.54742 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:16:00Z\",\n \"avg_ns\": 65956019854,\n \"stddev_ns\": 3685177,\n \"avg_ts\": 1.940687,\n \"stddev_ts\": 0.000108,\n \"samples_ns\": [ 65960263571, 65953626550, 65954169441 ],\n \"samples_ts\": [ 1.94056, 1.94076, 1.94074 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T04:12:39Z", + "avg_ns": 50247178934, + "stddev_ns": 431180, + "avg_ts": 2.547407, + "stddev_ts": 1.5e-05, + "samples_ns": [ + 50247257661, + 50247423779, + 50246855364 + ], + "samples_ts": [ + 2.5474, + 2.54739, + 2.54742 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T04:16:00Z", + "avg_ns": 65956019854, + "stddev_ns": 3685177, + "avg_ts": 1.940687, + "stddev_ts": 0.000108, + "samples_ns": [ + 65960263571, + 65953626550, + 65954169441 + ], + "samples_ts": [ + 1.94056, + 1.94076, + 1.94074 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1156 + }, + { + "timestamp_utc": "2025-12-11T04:36:01.081694+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:19:20Z\",\n \"avg_ns\": 50251180188,\n \"stddev_ns\": 128319,\n \"avg_ts\": 2.547204,\n \"stddev_ts\": 0.000007,\n \"samples_ns\": [ 50251273229, 50251233535, 50251033800 ],\n \"samples_ts\": [ 2.5472, 2.5472, 2.54721 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:22:41Z\",\n \"avg_ns\": 266388024599,\n \"stddev_ns\": 4962900,\n \"avg_ts\": 1.922008,\n \"stddev_ts\": 0.000036,\n \"samples_ns\": [ 266391886585, 266382426976, 266389760236 ],\n \"samples_ts\": [ 1.92198, 1.92205, 1.922 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T04:19:20Z", + "avg_ns": 50251180188, + "stddev_ns": 128319, + "avg_ts": 2.547204, + "stddev_ts": 7e-06, + "samples_ns": [ + 50251273229, + 50251233535, + 50251033800 + ], + "samples_ts": [ + 2.5472, + 2.5472, + 2.54721 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T04:22:41Z", + "avg_ns": 266388024599, + "stddev_ns": 4962900, + "avg_ts": 1.922008, + "stddev_ts": 3.6e-05, + "samples_ns": [ + 266391886585, + 266382426976, + 266389760236 + ], + "samples_ts": [ + 1.92198, + 1.92205, + 1.922 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1157 + }, + { + "timestamp_utc": "2025-12-11T04:52:47.280657+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:36:02Z\",\n \"avg_ns\": 201634377368,\n \"stddev_ns\": 908630,\n \"avg_ts\": 2.539250,\n \"stddev_ts\": 0.000010,\n \"samples_ns\": [ 201634438449, 201635134976, 201633558680 ],\n \"samples_ts\": [ 2.53925, 2.53924, 2.53926 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:49:28Z\",\n \"avg_ns\": 65983282468,\n \"stddev_ns\": 4798426,\n \"avg_ts\": 1.939885,\n \"stddev_ts\": 0.000141,\n \"samples_ns\": [ 65988587190, 65981967224, 65979292992 ],\n \"samples_ts\": [ 1.93973, 1.93992, 1.94 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T04:36:02Z", + "avg_ns": 201634377368, + "stddev_ns": 908630, + "avg_ts": 2.53925, + "stddev_ts": 1e-05, + "samples_ns": [ + 201634438449, + 201635134976, + 201633558680 + ], + "samples_ts": [ + 2.53925, + 2.53924, + 2.53926 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T04:49:28Z", + "avg_ns": 65983282468, + "stddev_ns": 4798426, + "avg_ts": 1.939885, + "stddev_ts": 0.000141, + "samples_ns": [ + 65988587190, + 65981967224, + 65979292992 + ], + "samples_ts": [ + 1.93973, + 1.93992, + 1.94 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1158 + }, + { + "timestamp_utc": "2025-12-11T05:19:33.619223+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T04:52:48Z\",\n \"avg_ns\": 201581373904,\n \"stddev_ns\": 689591,\n \"avg_ts\": 2.539917,\n \"stddev_ts\": 0.000009,\n \"samples_ns\": [ 201582155500, 201580851322, 201581114890 ],\n \"samples_ts\": [ 2.53991, 2.53992, 2.53992 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:06:14Z\",\n \"avg_ns\": 266091113868,\n \"stddev_ns\": 6334409,\n \"avg_ts\": 1.924153,\n \"stddev_ts\": 0.000045,\n \"samples_ns\": [ 266087809796, 266098369911, 266087161899 ],\n \"samples_ts\": [ 1.92418, 1.9241, 1.92418 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T04:52:48Z", + "avg_ns": 201581373904, + "stddev_ns": 689591, + "avg_ts": 2.539917, + "stddev_ts": 9e-06, + "samples_ns": [ + 201582155500, + 201580851322, + 201581114890 + ], + "samples_ts": [ + 2.53991, + 2.53992, + 2.53992 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T05:06:14Z", + "avg_ns": 266091113868, + "stddev_ns": 6334409, + "avg_ts": 1.924153, + "stddev_ts": 4.5e-05, + "samples_ns": [ + 266087809796, + 266098369911, + 266087161899 + ], + "samples_ts": [ + 1.92418, + 1.9241, + 1.92418 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1159 + }, + { + "timestamp_utc": "2025-12-11T05:26:13.892815+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:19:34Z\",\n \"avg_ns\": 50247724046,\n \"stddev_ns\": 522448,\n \"avg_ts\": 2.547379,\n \"stddev_ts\": 0.000021,\n \"samples_ns\": [ 50248080326, 50247823857, 50247267957 ],\n \"samples_ts\": [ 2.54736, 2.54737, 2.5474 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:22:55Z\",\n \"avg_ns\": 65851949878,\n \"stddev_ns\": 3766169,\n \"avg_ts\": 1.943754,\n \"stddev_ts\": 0.000111,\n \"samples_ns\": [ 65856274788, 65850086667, 65849488180 ],\n \"samples_ts\": [ 1.94363, 1.94381, 1.94383 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T05:19:34Z", + "avg_ns": 50247724046, + "stddev_ns": 522448, + "avg_ts": 2.547379, + "stddev_ts": 2.1e-05, + "samples_ns": [ + 50248080326, + 50247823857, + 50247267957 + ], + "samples_ts": [ + 2.54736, + 2.54737, + 2.5474 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T05:22:55Z", + "avg_ns": 65851949878, + "stddev_ns": 3766169, + "avg_ts": 1.943754, + "stddev_ts": 0.000111, + "samples_ns": [ + 65856274788, + 65850086667, + 65849488180 + ], + "samples_ts": [ + 1.94363, + 1.94381, + 1.94383 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1160 + }, + { + "timestamp_utc": "2025-12-11T05:42:53.901315+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:26:14Z\",\n \"avg_ns\": 50246384184,\n \"stddev_ns\": 656442,\n \"avg_ts\": 2.547447,\n \"stddev_ts\": 0.000029,\n \"samples_ns\": [ 50246069250, 50246035652, 50247047652 ],\n \"samples_ts\": [ 2.54746, 2.54746, 2.54741 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:29:35Z\",\n \"avg_ns\": 265767318009,\n \"stddev_ns\": 2557920,\n \"avg_ts\": 1.926497,\n \"stddev_ts\": 0.000018,\n \"samples_ns\": [ 265767782796, 265769504170, 265764667063 ],\n \"samples_ts\": [ 1.92649, 1.92648, 1.92652 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T05:26:14Z", + "avg_ns": 50246384184, + "stddev_ns": 656442, + "avg_ts": 2.547447, + "stddev_ts": 2.9e-05, + "samples_ns": [ + 50246069250, + 50246035652, + 50247047652 + ], + "samples_ts": [ + 2.54746, + 2.54746, + 2.54741 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T05:29:35Z", + "avg_ns": 265767318009, + "stddev_ns": 2557920, + "avg_ts": 1.926497, + "stddev_ts": 1.8e-05, + "samples_ns": [ + 265767782796, + 265769504170, + 265764667063 + ], + "samples_ts": [ + 1.92649, + 1.92648, + 1.92652 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1161 + }, + { + "timestamp_utc": "2025-12-11T05:59:43.792056+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:42:54Z\",\n \"avg_ns\": 202574444264,\n \"stddev_ns\": 2867625,\n \"avg_ts\": 2.527466,\n \"stddev_ts\": 0.000035,\n \"samples_ns\": [ 202574105079, 202571797047, 202577430667 ],\n \"samples_ts\": [ 2.52747, 2.5275, 2.52743 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:56:25Z\",\n \"avg_ns\": 65954134881,\n \"stddev_ns\": 1638403,\n \"avg_ts\": 1.940743,\n \"stddev_ts\": 0.000047,\n \"samples_ns\": [ 65955858062, 65953843770, 65952702813 ],\n \"samples_ts\": [ 1.94069, 1.94075, 1.94078 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T05:42:54Z", + "avg_ns": 202574444264, + "stddev_ns": 2867625, + "avg_ts": 2.527466, + "stddev_ts": 3.5e-05, + "samples_ns": [ + 202574105079, + 202571797047, + 202577430667 + ], + "samples_ts": [ + 2.52747, + 2.5275, + 2.52743 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T05:56:25Z", + "avg_ns": 65954134881, + "stddev_ns": 1638403, + "avg_ts": 1.940743, + "stddev_ts": 4.7e-05, + "samples_ns": [ + 65955858062, + 65953843770, + 65952702813 + ], + "samples_ts": [ + 1.94069, + 1.94075, + 1.94078 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1162 + }, + { + "timestamp_utc": "2025-12-11T06:26:33.685703+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T05:59:44Z\",\n \"avg_ns\": 202607043434,\n \"stddev_ns\": 1648051,\n \"avg_ts\": 2.527059,\n \"stddev_ts\": 0.000021,\n \"samples_ns\": [ 202608873793, 202606579268, 202605677241 ],\n \"samples_ts\": [ 2.52704, 2.52707, 2.52708 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:13:15Z\",\n \"avg_ns\": 265912219029,\n \"stddev_ns\": 5669278,\n \"avg_ts\": 1.925447,\n \"stddev_ts\": 0.000041,\n \"samples_ns\": [ 265918764342, 265908846975, 265909045770 ],\n \"samples_ts\": [ 1.9254, 1.92547, 1.92547 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T05:59:44Z", + "avg_ns": 202607043434, + "stddev_ns": 1648051, + "avg_ts": 2.527059, + "stddev_ts": 2.1e-05, + "samples_ns": [ + 202608873793, + 202606579268, + 202605677241 + ], + "samples_ts": [ + 2.52704, + 2.52707, + 2.52708 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T06:13:15Z", + "avg_ns": 265912219029, + "stddev_ns": 5669278, + "avg_ts": 1.925447, + "stddev_ts": 4.1e-05, + "samples_ns": [ + 265918764342, + 265908846975, + 265909045770 + ], + "samples_ts": [ + 1.9254, + 1.92547, + 1.92547 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1163 + }, + { + "timestamp_utc": "2025-12-11T06:33:14.151419+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:26:34Z\",\n \"avg_ns\": 50241566402,\n \"stddev_ns\": 210171,\n \"avg_ts\": 2.547691,\n \"stddev_ts\": 0.000011,\n \"samples_ns\": [ 50241773682, 50241572070, 50241353454 ],\n \"samples_ts\": [ 2.54768, 2.54769, 2.5477 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:29:55Z\",\n \"avg_ns\": 65930694553,\n \"stddev_ns\": 763348,\n \"avg_ts\": 1.941433,\n \"stddev_ts\": 0.000021,\n \"samples_ns\": [ 65931370647, 65930773572, 65929939441 ],\n \"samples_ts\": [ 1.94141, 1.94143, 1.94145 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T06:26:34Z", + "avg_ns": 50241566402, + "stddev_ns": 210171, + "avg_ts": 2.547691, + "stddev_ts": 1.1e-05, + "samples_ns": [ + 50241773682, + 50241572070, + 50241353454 + ], + "samples_ts": [ + 2.54768, + 2.54769, + 2.5477 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T06:29:55Z", + "avg_ns": 65930694553, + "stddev_ns": 763348, + "avg_ts": 1.941433, + "stddev_ts": 2.1e-05, + "samples_ns": [ + 65931370647, + 65930773572, + 65929939441 + ], + "samples_ts": [ + 1.94141, + 1.94143, + 1.94145 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1164 + }, + { + "timestamp_utc": "2025-12-11T06:49:54.376308+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:33:15Z\",\n \"avg_ns\": 50248990210,\n \"stddev_ns\": 240012,\n \"avg_ts\": 2.547315,\n \"stddev_ts\": 0.000004,\n \"samples_ns\": [ 50248959554, 50248923977, 50249087100 ],\n \"samples_ts\": [ 2.54732, 2.54732, 2.54731 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:36:36Z\",\n \"avg_ns\": 265825232554,\n \"stddev_ns\": 3153885,\n \"avg_ts\": 1.926078,\n \"stddev_ts\": 0.000023,\n \"samples_ns\": [ 265821631904, 265826560137, 265827505621 ],\n \"samples_ts\": [ 1.9261, 1.92607, 1.92606 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T06:33:15Z", + "avg_ns": 50248990210, + "stddev_ns": 240012, + "avg_ts": 2.547315, + "stddev_ts": 4e-06, + "samples_ns": [ + 50248959554, + 50248923977, + 50249087100 + ], + "samples_ts": [ + 2.54732, + 2.54732, + 2.54731 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T06:36:36Z", + "avg_ns": 265825232554, + "stddev_ns": 3153885, + "avg_ts": 1.926078, + "stddev_ts": 2.3e-05, + "samples_ns": [ + 265821631904, + 265826560137, + 265827505621 + ], + "samples_ts": [ + 1.9261, + 1.92607, + 1.92606 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1165 + }, + { + "timestamp_utc": "2025-12-11T07:06:39.634231+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T06:49:55Z\",\n \"avg_ns\": 201487603580,\n \"stddev_ns\": 1842272,\n \"avg_ts\": 2.541099,\n \"stddev_ts\": 0.000022,\n \"samples_ns\": [ 201487668847, 201485842423, 201489299472 ],\n \"samples_ts\": [ 2.5411, 2.54112, 2.54108 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:03:21Z\",\n \"avg_ns\": 65858469206,\n \"stddev_ns\": 3773718,\n \"avg_ts\": 1.943562,\n \"stddev_ts\": 0.000111,\n \"samples_ns\": [ 65862189969, 65858539157, 65854678494 ],\n \"samples_ts\": [ 1.94345, 1.94356, 1.94367 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T06:49:55Z", + "avg_ns": 201487603580, + "stddev_ns": 1842272, + "avg_ts": 2.541099, + "stddev_ts": 2.2e-05, + "samples_ns": [ + 201487668847, + 201485842423, + 201489299472 + ], + "samples_ts": [ + 2.5411, + 2.54112, + 2.54108 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T07:03:21Z", + "avg_ns": 65858469206, + "stddev_ns": 3773718, + "avg_ts": 1.943562, + "stddev_ts": 0.000111, + "samples_ns": [ + 65862189969, + 65858539157, + 65854678494 + ], + "samples_ts": [ + 1.94345, + 1.94356, + 1.94367 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1166 + }, + { + "timestamp_utc": "2025-12-11T07:33:25.739663+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:06:40Z\",\n \"avg_ns\": 201537651881,\n \"stddev_ns\": 1720622,\n \"avg_ts\": 2.540468,\n \"stddev_ts\": 0.000020,\n \"samples_ns\": [ 201536896668, 201536570137, 201539488840 ],\n \"samples_ts\": [ 2.54048, 2.54048, 2.54045 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:20:06Z\",\n \"avg_ns\": 266076915455,\n \"stddev_ns\": 1264792,\n \"avg_ts\": 1.924256,\n \"stddev_ts\": 0.000007,\n \"samples_ns\": [ 266076323352, 266076314509, 266078108506 ],\n \"samples_ts\": [ 1.92426, 1.92426, 1.92425 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T07:06:40Z", + "avg_ns": 201537651881, + "stddev_ns": 1720622, + "avg_ts": 2.540468, + "stddev_ts": 2e-05, + "samples_ns": [ + 201536896668, + 201536570137, + 201539488840 + ], + "samples_ts": [ + 2.54048, + 2.54048, + 2.54045 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T07:20:06Z", + "avg_ns": 266076915455, + "stddev_ns": 1264792, + "avg_ts": 1.924256, + "stddev_ts": 7e-06, + "samples_ns": [ + 266076323352, + 266076314509, + 266078108506 + ], + "samples_ts": [ + 1.92426, + 1.92426, + 1.92425 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1167 + }, + { + "timestamp_utc": "2025-12-11T07:40:06.216790+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:33:26Z\",\n \"avg_ns\": 50250056431,\n \"stddev_ns\": 983014,\n \"avg_ts\": 2.547261,\n \"stddev_ts\": 0.000047,\n \"samples_ns\": [ 50248992983, 50250455393, 50250720919 ],\n \"samples_ts\": [ 2.54731, 2.54724, 2.54723 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:36:47Z\",\n \"avg_ns\": 65926078049,\n \"stddev_ns\": 2984169,\n \"avg_ts\": 1.941569,\n \"stddev_ts\": 0.000088,\n \"samples_ns\": [ 65925615537, 65929255303, 65923363308 ],\n \"samples_ts\": [ 1.94158, 1.94147, 1.94165 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T07:33:26Z", + "avg_ns": 50250056431, + "stddev_ns": 983014, + "avg_ts": 2.547261, + "stddev_ts": 4.7e-05, + "samples_ns": [ + 50248992983, + 50250455393, + 50250720919 + ], + "samples_ts": [ + 2.54731, + 2.54724, + 2.54723 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T07:36:47Z", + "avg_ns": 65926078049, + "stddev_ns": 2984169, + "avg_ts": 1.941569, + "stddev_ts": 8.8e-05, + "samples_ns": [ + 65925615537, + 65929255303, + 65923363308 + ], + "samples_ts": [ + 1.94158, + 1.94147, + 1.94165 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1168 + }, + { + "timestamp_utc": "2025-12-11T07:56:47.059503+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:40:07Z\",\n \"avg_ns\": 50241992884,\n \"stddev_ns\": 439646,\n \"avg_ts\": 2.547670,\n \"stddev_ts\": 0.000015,\n \"samples_ns\": [ 50241889699, 50242335716, 50241753239 ],\n \"samples_ts\": [ 2.54767, 2.54765, 2.54768 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:43:28Z\",\n \"avg_ns\": 265946272927,\n \"stddev_ns\": 2796153,\n \"avg_ts\": 1.925201,\n \"stddev_ts\": 0.000020,\n \"samples_ns\": [ 265943421191, 265946609172, 265948788420 ],\n \"samples_ts\": [ 1.92522, 1.9252, 1.92518 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T07:40:07Z", + "avg_ns": 50241992884, + "stddev_ns": 439646, + "avg_ts": 2.54767, + "stddev_ts": 1.5e-05, + "samples_ns": [ + 50241889699, + 50242335716, + 50241753239 + ], + "samples_ts": [ + 2.54767, + 2.54765, + 2.54768 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T07:43:28Z", + "avg_ns": 265946272927, + "stddev_ns": 2796153, + "avg_ts": 1.925201, + "stddev_ts": 2e-05, + "samples_ns": [ + 265943421191, + 265946609172, + 265948788420 + ], + "samples_ts": [ + 1.92522, + 1.9252, + 1.92518 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1169 + }, + { + "timestamp_utc": "2025-12-11T08:13:32.677796+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T07:56:48Z\",\n \"avg_ns\": 201623291119,\n \"stddev_ns\": 2701514,\n \"avg_ts\": 2.539389,\n \"stddev_ts\": 0.000033,\n \"samples_ns\": [ 201624849426, 201624764453, 201620259480 ],\n \"samples_ts\": [ 2.53937, 2.53937, 2.53943 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:10:14Z\",\n \"avg_ns\": 65804056958,\n \"stddev_ns\": 23604465,\n \"avg_ts\": 1.945169,\n \"stddev_ts\": 0.000698,\n \"samples_ns\": [ 65831306229, 65790815932, 65790048715 ],\n \"samples_ts\": [ 1.94436, 1.94556, 1.94558 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T07:56:48Z", + "avg_ns": 201623291119, + "stddev_ns": 2701514, + "avg_ts": 2.539389, + "stddev_ts": 3.3e-05, + "samples_ns": [ + 201624849426, + 201624764453, + 201620259480 + ], + "samples_ts": [ + 2.53937, + 2.53937, + 2.53943 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T08:10:14Z", + "avg_ns": 65804056958, + "stddev_ns": 23604465, + "avg_ts": 1.945169, + "stddev_ts": 0.000698, + "samples_ns": [ + 65831306229, + 65790815932, + 65790048715 + ], + "samples_ts": [ + 1.94436, + 1.94556, + 1.94558 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1170 + }, + { + "timestamp_utc": "2025-12-11T08:40:18.478147+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:13:33Z\",\n \"avg_ns\": 201578158823,\n \"stddev_ns\": 669305,\n \"avg_ts\": 2.539958,\n \"stddev_ts\": 0.000003,\n \"samples_ns\": [ 201578209804, 201578340370, 201577926297 ],\n \"samples_ts\": [ 2.53996, 2.53996, 2.53996 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:27:00Z\",\n \"avg_ns\": 265914795795,\n \"stddev_ns\": 1840999,\n \"avg_ts\": 1.925429,\n \"stddev_ts\": 0.000013,\n \"samples_ns\": [ 265914453024, 265913224986, 265916709376 ],\n \"samples_ts\": [ 1.92543, 1.92544, 1.92541 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T08:13:33Z", + "avg_ns": 201578158823, + "stddev_ns": 669305, + "avg_ts": 2.539958, + "stddev_ts": 3e-06, + "samples_ns": [ + 201578209804, + 201578340370, + 201577926297 + ], + "samples_ts": [ + 2.53996, + 2.53996, + 2.53996 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T08:27:00Z", + "avg_ns": 265914795795, + "stddev_ns": 1840999, + "avg_ts": 1.925429, + "stddev_ts": 1.3e-05, + "samples_ns": [ + 265914453024, + 265913224986, + 265916709376 + ], + "samples_ts": [ + 1.92543, + 1.92544, + 1.92541 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1171 + }, + { + "timestamp_utc": "2025-12-11T08:46:59.009876+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:40:19Z\",\n \"avg_ns\": 50252765970,\n \"stddev_ns\": 1089200,\n \"avg_ts\": 2.547123,\n \"stddev_ts\": 0.000055,\n \"samples_ns\": [ 50251539724, 50253137022, 50253621164 ],\n \"samples_ts\": [ 2.54719, 2.5471, 2.54708 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:43:40Z\",\n \"avg_ns\": 65940722462,\n \"stddev_ns\": 1912051,\n \"avg_ts\": 1.941137,\n \"stddev_ts\": 0.000056,\n \"samples_ns\": [ 65942565557, 65938748207, 65940853622 ],\n \"samples_ts\": [ 1.94108, 1.9412, 1.94113 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T08:40:19Z", + "avg_ns": 50252765970, + "stddev_ns": 1089200, + "avg_ts": 2.547123, + "stddev_ts": 5.5e-05, + "samples_ns": [ + 50251539724, + 50253137022, + 50253621164 + ], + "samples_ts": [ + 2.54719, + 2.5471, + 2.54708 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T08:43:40Z", + "avg_ns": 65940722462, + "stddev_ns": 1912051, + "avg_ts": 1.941137, + "stddev_ts": 5.6e-05, + "samples_ns": [ + 65942565557, + 65938748207, + 65940853622 + ], + "samples_ts": [ + 1.94108, + 1.9412, + 1.94113 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1172 + }, + { + "timestamp_utc": "2025-12-11T09:03:39.654348+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:47:00Z\",\n \"avg_ns\": 50256585379,\n \"stddev_ns\": 538893,\n \"avg_ts\": 2.546930,\n \"stddev_ts\": 0.000025,\n \"samples_ns\": [ 50256433527, 50256189230, 50257133381 ],\n \"samples_ts\": [ 2.54694, 2.54695, 2.5469 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T08:50:21Z\",\n \"avg_ns\": 265968179655,\n \"stddev_ns\": 4304066,\n \"avg_ts\": 1.925042,\n \"stddev_ts\": 0.000031,\n \"samples_ns\": [ 265970803440, 265970523142, 265963212383 ],\n \"samples_ts\": [ 1.92502, 1.92503, 1.92508 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T08:47:00Z", + "avg_ns": 50256585379, + "stddev_ns": 538893, + "avg_ts": 2.54693, + "stddev_ts": 2.5e-05, + "samples_ns": [ + 50256433527, + 50256189230, + 50257133381 + ], + "samples_ts": [ + 2.54694, + 2.54695, + 2.5469 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T08:50:21Z", + "avg_ns": 265968179655, + "stddev_ns": 4304066, + "avg_ts": 1.925042, + "stddev_ts": 3.1e-05, + "samples_ns": [ + 265970803440, + 265970523142, + 265963212383 + ], + "samples_ts": [ + 1.92502, + 1.92503, + 1.92508 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1173 + }, + { + "timestamp_utc": "2025-12-11T09:20:29.658199+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:03:40Z\",\n \"avg_ns\": 202606680933,\n \"stddev_ns\": 1898490,\n \"avg_ts\": 2.527064,\n \"stddev_ts\": 0.000023,\n \"samples_ns\": [ 202608485428, 202606758232, 202604799140 ],\n \"samples_ts\": [ 2.52704, 2.52706, 2.52709 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:17:11Z\",\n \"avg_ns\": 65944345739,\n \"stddev_ns\": 7967570,\n \"avg_ts\": 1.941031,\n \"stddev_ts\": 0.000234,\n \"samples_ns\": [ 65953531896, 65940000024, 65939505299 ],\n \"samples_ts\": [ 1.94076, 1.94116, 1.94117 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T09:03:40Z", + "avg_ns": 202606680933, + "stddev_ns": 1898490, + "avg_ts": 2.527064, + "stddev_ts": 2.3e-05, + "samples_ns": [ + 202608485428, + 202606758232, + 202604799140 + ], + "samples_ts": [ + 2.52704, + 2.52706, + 2.52709 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T09:17:11Z", + "avg_ns": 65944345739, + "stddev_ns": 7967570, + "avg_ts": 1.941031, + "stddev_ts": 0.000234, + "samples_ns": [ + 65953531896, + 65940000024, + 65939505299 + ], + "samples_ts": [ + 1.94076, + 1.94116, + 1.94117 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1174 + }, + { + "timestamp_utc": "2025-12-11T09:47:18.564455+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:20:30Z\",\n \"avg_ns\": 202542807547,\n \"stddev_ns\": 2608543,\n \"avg_ts\": 2.527861,\n \"stddev_ts\": 0.000033,\n \"samples_ns\": [ 202543868378, 202539835723, 202544718540 ],\n \"samples_ts\": [ 2.52785, 2.5279, 2.52784 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:34:01Z\",\n \"avg_ns\": 265547333729,\n \"stddev_ns\": 1430433,\n \"avg_ts\": 1.928093,\n \"stddev_ts\": 0.000009,\n \"samples_ns\": [ 265548627292, 265547196916, 265546176981 ],\n \"samples_ts\": [ 1.92808, 1.92809, 1.9281 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T09:20:30Z", + "avg_ns": 202542807547, + "stddev_ns": 2608543, + "avg_ts": 2.527861, + "stddev_ts": 3.3e-05, + "samples_ns": [ + 202543868378, + 202539835723, + 202544718540 + ], + "samples_ts": [ + 2.52785, + 2.5279, + 2.52784 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T09:34:01Z", + "avg_ns": 265547333729, + "stddev_ns": 1430433, + "avg_ts": 1.928093, + "stddev_ts": 9e-06, + "samples_ns": [ + 265548627292, + 265547196916, + 265546176981 + ], + "samples_ts": [ + 1.92808, + 1.92809, + 1.9281 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1175 + }, + { + "timestamp_utc": "2025-12-11T09:53:58.962147+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:47:19Z\",\n \"avg_ns\": 50242758154,\n \"stddev_ns\": 598605,\n \"avg_ts\": 2.547631,\n \"stddev_ts\": 0.000030,\n \"samples_ns\": [ 50242398316, 50243449166, 50242426980 ],\n \"samples_ts\": [ 2.54765, 2.5476, 2.54765 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:50:40Z\",\n \"avg_ns\": 65892551227,\n \"stddev_ns\": 3248001,\n \"avg_ts\": 1.942556,\n \"stddev_ts\": 0.000095,\n \"samples_ns\": [ 65896139725, 65891628612, 65889885346 ],\n \"samples_ts\": [ 1.94245, 1.94258, 1.94264 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T09:47:19Z", + "avg_ns": 50242758154, + "stddev_ns": 598605, + "avg_ts": 2.547631, + "stddev_ts": 3e-05, + "samples_ns": [ + 50242398316, + 50243449166, + 50242426980 + ], + "samples_ts": [ + 2.54765, + 2.5476, + 2.54765 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T09:50:40Z", + "avg_ns": 65892551227, + "stddev_ns": 3248001, + "avg_ts": 1.942556, + "stddev_ts": 9.5e-05, + "samples_ns": [ + 65896139725, + 65891628612, + 65889885346 + ], + "samples_ts": [ + 1.94245, + 1.94258, + 1.94264 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1176 + }, + { + "timestamp_utc": "2025-12-11T10:10:39.294956+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:54:00Z\",\n \"avg_ns\": 50246902613,\n \"stddev_ns\": 389222,\n \"avg_ts\": 2.547421,\n \"stddev_ts\": 0.000016,\n \"samples_ns\": [ 50247222092, 50246585721, 50246900027 ],\n \"samples_ts\": [ 2.5474, 2.54744, 2.54742 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T09:57:21Z\",\n \"avg_ns\": 265871770831,\n \"stddev_ns\": 3160896,\n \"avg_ts\": 1.925740,\n \"stddev_ts\": 0.000022,\n \"samples_ns\": [ 265875166113, 265870975172, 265869171210 ],\n \"samples_ts\": [ 1.92572, 1.92575, 1.92576 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T09:54:00Z", + "avg_ns": 50246902613, + "stddev_ns": 389222, + "avg_ts": 2.547421, + "stddev_ts": 1.6e-05, + "samples_ns": [ + 50247222092, + 50246585721, + 50246900027 + ], + "samples_ts": [ + 2.5474, + 2.54744, + 2.54742 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T09:57:21Z", + "avg_ns": 265871770831, + "stddev_ns": 3160896, + "avg_ts": 1.92574, + "stddev_ts": 2.2e-05, + "samples_ns": [ + 265875166113, + 265870975172, + 265869171210 + ], + "samples_ts": [ + 1.92572, + 1.92575, + 1.92576 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1177 + }, + { + "timestamp_utc": "2025-12-11T10:27:24.392865+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T10:10:40Z\",\n \"avg_ns\": 201506255209,\n \"stddev_ns\": 968297,\n \"avg_ts\": 2.540864,\n \"stddev_ts\": 0.000012,\n \"samples_ns\": [ 201505138193, 201506771223, 201506856211 ],\n \"samples_ts\": [ 2.54088, 2.54086, 2.54086 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T10:24:06Z\",\n \"avg_ns\": 65785331567,\n \"stddev_ns\": 5108680,\n \"avg_ts\": 1.945723,\n \"stddev_ts\": 0.000151,\n \"samples_ns\": [ 65791215353, 65782336325, 65782443025 ],\n \"samples_ts\": [ 1.94555, 1.94581, 1.94581 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T10:10:40Z", + "avg_ns": 201506255209, + "stddev_ns": 968297, + "avg_ts": 2.540864, + "stddev_ts": 1.2e-05, + "samples_ns": [ + 201505138193, + 201506771223, + 201506856211 + ], + "samples_ts": [ + 2.54088, + 2.54086, + 2.54086 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T10:24:06Z", + "avg_ns": 65785331567, + "stddev_ns": 5108680, + "avg_ts": 1.945723, + "stddev_ts": 0.000151, + "samples_ns": [ + 65791215353, + 65782336325, + 65782443025 + ], + "samples_ts": [ + 1.94555, + 1.94581, + 1.94581 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1178 + }, + { + "timestamp_utc": "2025-12-11T10:54:10.189126+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T10:27:25Z\",\n \"avg_ns\": 201537510232,\n \"stddev_ns\": 1283323,\n \"avg_ts\": 2.540470,\n \"stddev_ts\": 0.000014,\n \"samples_ns\": [ 201538600293, 201537559063, 201536371342 ],\n \"samples_ts\": [ 2.54046, 2.54047, 2.54048 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T10:40:51Z\",\n \"avg_ns\": 265972649749,\n \"stddev_ns\": 4582885,\n \"avg_ts\": 1.925010,\n \"stddev_ts\": 0.000033,\n \"samples_ns\": [ 265977932902, 265970270963, 265969745382 ],\n \"samples_ts\": [ 1.92497, 1.92503, 1.92503 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T10:27:25Z", + "avg_ns": 201537510232, + "stddev_ns": 1283323, + "avg_ts": 2.54047, + "stddev_ts": 1.4e-05, + "samples_ns": [ + 201538600293, + 201537559063, + 201536371342 + ], + "samples_ts": [ + 2.54046, + 2.54047, + 2.54048 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T10:40:51Z", + "avg_ns": 265972649749, + "stddev_ns": 4582885, + "avg_ts": 1.92501, + "stddev_ts": 3.3e-05, + "samples_ns": [ + 265977932902, + 265970270963, + 265969745382 + ], + "samples_ts": [ + 1.92497, + 1.92503, + 1.92503 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1179 + }, + { + "timestamp_utc": "2025-12-11T11:00:50.710300+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T10:54:11Z\",\n \"avg_ns\": 50255108900,\n \"stddev_ns\": 592891,\n \"avg_ts\": 2.547005,\n \"stddev_ts\": 0.000025,\n \"samples_ns\": [ 50255050375, 50254639725, 50255636602 ],\n \"samples_ts\": [ 2.54701, 2.54703, 2.54698 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T10:57:32Z\",\n \"avg_ns\": 65933332412,\n \"stddev_ns\": 615449,\n \"avg_ts\": 1.941355,\n \"stddev_ts\": 0.000018,\n \"samples_ns\": [ 65933988210, 65932767394, 65933241632 ],\n \"samples_ts\": [ 1.94134, 1.94137, 1.94136 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T10:54:11Z", + "avg_ns": 50255108900, + "stddev_ns": 592891, + "avg_ts": 2.547005, + "stddev_ts": 2.5e-05, + "samples_ns": [ + 50255050375, + 50254639725, + 50255636602 + ], + "samples_ts": [ + 2.54701, + 2.54703, + 2.54698 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T10:57:32Z", + "avg_ns": 65933332412, + "stddev_ns": 615449, + "avg_ts": 1.941355, + "stddev_ts": 1.8e-05, + "samples_ns": [ + 65933988210, + 65932767394, + 65933241632 + ], + "samples_ts": [ + 1.94134, + 1.94137, + 1.94136 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1180 + }, + { + "timestamp_utc": "2025-12-11T11:17:30.192325+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:00:51Z\",\n \"avg_ns\": 50245197810,\n \"stddev_ns\": 332289,\n \"avg_ts\": 2.547507,\n \"stddev_ts\": 0.000017,\n \"samples_ns\": [ 50244874508, 50245538411, 50245180511 ],\n \"samples_ts\": [ 2.54752, 2.54749, 2.54751 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:04:12Z\",\n \"avg_ns\": 265594055012,\n \"stddev_ns\": 6934398,\n \"avg_ts\": 1.927754,\n \"stddev_ts\": 0.000050,\n \"samples_ns\": [ 265587779250, 265592948392, 265601437396 ],\n \"samples_ts\": [ 1.9278, 1.92776, 1.9277 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T11:00:51Z", + "avg_ns": 50245197810, + "stddev_ns": 332289, + "avg_ts": 2.547507, + "stddev_ts": 1.7e-05, + "samples_ns": [ + 50244874508, + 50245538411, + 50245180511 + ], + "samples_ts": [ + 2.54752, + 2.54749, + 2.54751 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T11:04:12Z", + "avg_ns": 265594055012, + "stddev_ns": 6934398, + "avg_ts": 1.927754, + "stddev_ts": 5e-05, + "samples_ns": [ + 265587779250, + 265592948392, + 265601437396 + ], + "samples_ts": [ + 1.9278, + 1.92776, + 1.9277 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1181 + }, + { + "timestamp_utc": "2025-12-11T11:34:16.250132+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:17:31Z\",\n \"avg_ns\": 201598526655,\n \"stddev_ns\": 1906628,\n \"avg_ts\": 2.539701,\n \"stddev_ts\": 0.000023,\n \"samples_ns\": [ 201599253286, 201599847395, 201596479286 ],\n \"samples_ts\": [ 2.53969, 2.53968, 2.53973 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:30:57Z\",\n \"avg_ns\": 65968963041,\n \"stddev_ns\": 2583915,\n \"avg_ts\": 1.940306,\n \"stddev_ts\": 0.000076,\n \"samples_ns\": [ 65971602759, 65968847563, 65966438801 ],\n \"samples_ts\": [ 1.94023, 1.94031, 1.94038 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T11:17:31Z", + "avg_ns": 201598526655, + "stddev_ns": 1906628, + "avg_ts": 2.539701, + "stddev_ts": 2.3e-05, + "samples_ns": [ + 201599253286, + 201599847395, + 201596479286 + ], + "samples_ts": [ + 2.53969, + 2.53968, + 2.53973 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T11:30:57Z", + "avg_ns": 65968963041, + "stddev_ns": 2583915, + "avg_ts": 1.940306, + "stddev_ts": 7.6e-05, + "samples_ns": [ + 65971602759, + 65968847563, + 65966438801 + ], + "samples_ts": [ + 1.94023, + 1.94031, + 1.94038 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1182 + }, + { + "timestamp_utc": "2025-12-11T12:01:02.386052+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:34:17Z\",\n \"avg_ns\": 201574385813,\n \"stddev_ns\": 2060041,\n \"avg_ts\": 2.540005,\n \"stddev_ts\": 0.000025,\n \"samples_ns\": [ 201576622500, 201573565042, 201572969899 ],\n \"samples_ts\": [ 2.53998, 2.54002, 2.54002 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T11:47:43Z\",\n \"avg_ns\": 266024413977,\n \"stddev_ns\": 6710998,\n \"avg_ts\": 1.924635,\n \"stddev_ts\": 0.000048,\n \"samples_ns\": [ 266016977903, 266026390163, 266029873867 ],\n \"samples_ts\": [ 1.92469, 1.92462, 1.9246 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T11:34:17Z", + "avg_ns": 201574385813, + "stddev_ns": 2060041, + "avg_ts": 2.540005, + "stddev_ts": 2.5e-05, + "samples_ns": [ + 201576622500, + 201573565042, + 201572969899 + ], + "samples_ts": [ + 2.53998, + 2.54002, + 2.54002 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T11:47:43Z", + "avg_ns": 266024413977, + "stddev_ns": 6710998, + "avg_ts": 1.924635, + "stddev_ts": 4.8e-05, + "samples_ns": [ + 266016977903, + 266026390163, + 266029873867 + ], + "samples_ts": [ + 1.92469, + 1.92462, + 1.9246 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1183 + }, + { + "timestamp_utc": "2025-12-11T12:07:42.978758+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:01:03Z\",\n \"avg_ns\": 50253315586,\n \"stddev_ns\": 143590,\n \"avg_ts\": 2.547096,\n \"stddev_ts\": 0.000007,\n \"samples_ns\": [ 50253459144, 50253171964, 50253315650 ],\n \"samples_ts\": [ 2.54709, 2.5471, 2.5471 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:04:24Z\",\n \"avg_ns\": 65943483753,\n \"stddev_ns\": 1002828,\n \"avg_ts\": 1.941056,\n \"stddev_ts\": 0.000030,\n \"samples_ns\": [ 65944641609, 65942918658, 65942890992 ],\n \"samples_ts\": [ 1.94102, 1.94107, 1.94107 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T12:01:03Z", + "avg_ns": 50253315586, + "stddev_ns": 143590, + "avg_ts": 2.547096, + "stddev_ts": 7e-06, + "samples_ns": [ + 50253459144, + 50253171964, + 50253315650 + ], + "samples_ts": [ + 2.54709, + 2.5471, + 2.5471 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T12:04:24Z", + "avg_ns": 65943483753, + "stddev_ns": 1002828, + "avg_ts": 1.941056, + "stddev_ts": 3e-05, + "samples_ns": [ + 65944641609, + 65942918658, + 65942890992 + ], + "samples_ts": [ + 1.94102, + 1.94107, + 1.94107 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1184 + }, + { + "timestamp_utc": "2025-12-11T12:24:24.102642+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:07:44Z\",\n \"avg_ns\": 50398138482,\n \"stddev_ns\": 247411620,\n \"avg_ts\": 2.539817,\n \"stddev_ts\": 0.012433,\n \"samples_ns\": [ 50683823310, 50256033550, 50254558588 ],\n \"samples_ts\": [ 2.52546, 2.54696, 2.54703 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:11:06Z\",\n \"avg_ns\": 265594389779,\n \"stddev_ns\": 3435133,\n \"avg_ts\": 1.927752,\n \"stddev_ts\": 0.000024,\n \"samples_ns\": [ 265597121284, 265595405858, 265590642197 ],\n \"samples_ts\": [ 1.92773, 1.92774, 1.92778 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T12:07:44Z", + "avg_ns": 50398138482, + "stddev_ns": 247411620, + "avg_ts": 2.539817, + "stddev_ts": 0.012433, + "samples_ns": [ + 50683823310, + 50256033550, + 50254558588 + ], + "samples_ts": [ + 2.52546, + 2.54696, + 2.54703 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T12:11:06Z", + "avg_ns": 265594389779, + "stddev_ns": 3435133, + "avg_ts": 1.927752, + "stddev_ts": 2.4e-05, + "samples_ns": [ + 265597121284, + 265595405858, + 265590642197 + ], + "samples_ts": [ + 1.92773, + 1.92774, + 1.92778 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1185 + }, + { + "timestamp_utc": "2025-12-11T12:41:13.819607+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:24:25Z\",\n \"avg_ns\": 202543723528,\n \"stddev_ns\": 1883672,\n \"avg_ts\": 2.527849,\n \"stddev_ts\": 0.000024,\n \"samples_ns\": [ 202544308900, 202541616668, 202545245016 ],\n \"samples_ts\": [ 2.52784, 2.52788, 2.52783 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:37:55Z\",\n \"avg_ns\": 65938675055,\n \"stddev_ns\": 2192902,\n \"avg_ts\": 1.941198,\n \"stddev_ts\": 0.000065,\n \"samples_ns\": [ 65940538913, 65939227493, 65936258759 ],\n \"samples_ts\": [ 1.94114, 1.94118, 1.94127 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T12:24:25Z", + "avg_ns": 202543723528, + "stddev_ns": 1883672, + "avg_ts": 2.527849, + "stddev_ts": 2.4e-05, + "samples_ns": [ + 202544308900, + 202541616668, + 202545245016 + ], + "samples_ts": [ + 2.52784, + 2.52788, + 2.52783 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T12:37:55Z", + "avg_ns": 65938675055, + "stddev_ns": 2192902, + "avg_ts": 1.941198, + "stddev_ts": 6.5e-05, + "samples_ns": [ + 65940538913, + 65939227493, + 65936258759 + ], + "samples_ts": [ + 1.94114, + 1.94118, + 1.94127 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1186 + }, + { + "timestamp_utc": "2025-12-11T13:08:05.344814+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:41:14Z\",\n \"avg_ns\": 202564165257,\n \"stddev_ns\": 1326527,\n \"avg_ts\": 2.527594,\n \"stddev_ts\": 0.000016,\n \"samples_ns\": [ 202565476280, 202562992033, 202564027459 ],\n \"samples_ts\": [ 2.52758, 2.52761, 2.5276 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T12:54:45Z\",\n \"avg_ns\": 266503174740,\n \"stddev_ns\": 7206588,\n \"avg_ts\": 1.921178,\n \"stddev_ts\": 0.000052,\n \"samples_ns\": [ 266506335000, 266508221185, 266494968037 ],\n \"samples_ts\": [ 1.92116, 1.92114, 1.92124 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T12:41:14Z", + "avg_ns": 202564165257, + "stddev_ns": 1326527, + "avg_ts": 2.527594, + "stddev_ts": 1.6e-05, + "samples_ns": [ + 202565476280, + 202562992033, + 202564027459 + ], + "samples_ts": [ + 2.52758, + 2.52761, + 2.5276 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T12:54:45Z", + "avg_ns": 266503174740, + "stddev_ns": 7206588, + "avg_ts": 1.921178, + "stddev_ts": 5.2e-05, + "samples_ns": [ + 266506335000, + 266508221185, + 266494968037 + ], + "samples_ts": [ + 1.92116, + 1.92114, + 1.92124 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1187 + }, + { + "timestamp_utc": "2025-12-11T13:11:31.377883+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:08:06Z\",\n \"avg_ns\": 25177183074,\n \"stddev_ns\": 1145310,\n \"avg_ts\": 5.083968,\n \"stddev_ts\": 0.000229,\n \"samples_ns\": [ 25178421381, 25176933389, 25176194453 ],\n \"samples_ts\": [ 5.08372, 5.08402, 5.08417 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:09:47Z\",\n \"avg_ns\": 34620345446,\n \"stddev_ns\": 1800559,\n \"avg_ts\": 3.697248,\n \"stddev_ts\": 0.000190,\n \"samples_ns\": [ 34622274330, 34618762677, 34619999333 ],\n \"samples_ts\": [ 3.69704, 3.69742, 3.69728 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T13:08:06Z", + "avg_ns": 25177183074, + "stddev_ns": 1145310, + "avg_ts": 5.083968, + "stddev_ts": 0.000229, + "samples_ns": [ + 25178421381, + 25176933389, + 25176194453 + ], + "samples_ts": [ + 5.08372, + 5.08402, + 5.08417 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T13:09:47Z", + "avg_ns": 34620345446, + "stddev_ns": 1800559, + "avg_ts": 3.697248, + "stddev_ts": 0.00019, + "samples_ns": [ + 34622274330, + 34618762677, + 34619999333 + ], + "samples_ts": [ + 3.69704, + 3.69742, + 3.69728 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1188 + }, + { + "timestamp_utc": "2025-12-11T13:20:12.669707+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:11:32Z\",\n \"avg_ns\": 25179584502,\n \"stddev_ns\": 1420373,\n \"avg_ts\": 5.083483,\n \"stddev_ts\": 0.000283,\n \"samples_ns\": [ 25181035027, 25179483009, 25178235472 ],\n \"samples_ts\": [ 5.08319, 5.0835, 5.08376 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:13:13Z\",\n \"avg_ns\": 139701298865,\n \"stddev_ns\": 1669240,\n \"avg_ts\": 3.664962,\n \"stddev_ts\": 0.000044,\n \"samples_ns\": [ 139701249102, 139702992431, 139699655062 ],\n \"samples_ts\": [ 3.66496, 3.66492, 3.66501 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T13:11:32Z", + "avg_ns": 25179584502, + "stddev_ns": 1420373, + "avg_ts": 5.083483, + "stddev_ts": 0.000283, + "samples_ns": [ + 25181035027, + 25179483009, + 25178235472 + ], + "samples_ts": [ + 5.08319, + 5.0835, + 5.08376 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T13:13:13Z", + "avg_ns": 139701298865, + "stddev_ns": 1669240, + "avg_ts": 3.664962, + "stddev_ts": 4.4e-05, + "samples_ns": [ + 139701249102, + 139702992431, + 139699655062 + ], + "samples_ts": [ + 3.66496, + 3.66492, + 3.66501 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1189 + }, + { + "timestamp_utc": "2025-12-11T13:28:41.693173+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:20:13Z\",\n \"avg_ns\": 100964963123,\n \"stddev_ns\": 1181267,\n \"avg_ts\": 5.071066,\n \"stddev_ts\": 0.000055,\n \"samples_ns\": [ 100966215428, 100964468364, 100964205579 ],\n \"samples_ts\": [ 5.071, 5.07109, 5.0711 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:26:57Z\",\n \"avg_ns\": 34557842725,\n \"stddev_ns\": 9148985,\n \"avg_ts\": 3.703935,\n \"stddev_ts\": 0.000980,\n \"samples_ns\": [ 34567047094, 34557723232, 34548757851 ],\n \"samples_ts\": [ 3.70295, 3.70395, 3.70491 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T13:20:13Z", + "avg_ns": 100964963123, + "stddev_ns": 1181267, + "avg_ts": 5.071066, + "stddev_ts": 5.5e-05, + "samples_ns": [ + 100966215428, + 100964468364, + 100964205579 + ], + "samples_ts": [ + 5.071, + 5.07109, + 5.0711 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T13:26:57Z", + "avg_ns": 34557842725, + "stddev_ns": 9148985, + "avg_ts": 3.703935, + "stddev_ts": 0.00098, + "samples_ns": [ + 34567047094, + 34557723232, + 34548757851 + ], + "samples_ts": [ + 3.70295, + 3.70395, + 3.70491 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1190 + }, + { + "timestamp_utc": "2025-12-11T13:42:25.578279+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:28:42Z\",\n \"avg_ns\": 100976598238,\n \"stddev_ns\": 2169073,\n \"avg_ts\": 5.070482,\n \"stddev_ts\": 0.000109,\n \"samples_ns\": [ 100974983081, 100975748008, 100979063625 ],\n \"samples_ts\": [ 5.07056, 5.07052, 5.07036 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:35:26Z\",\n \"avg_ns\": 139490799500,\n \"stddev_ns\": 1902020,\n \"avg_ts\": 3.670493,\n \"stddev_ts\": 0.000048,\n \"samples_ns\": [ 139492886884, 139490021933, 139489489685 ],\n \"samples_ts\": [ 3.67044, 3.67051, 3.67053 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T13:28:42Z", + "avg_ns": 100976598238, + "stddev_ns": 2169073, + "avg_ts": 5.070482, + "stddev_ts": 0.000109, + "samples_ns": [ + 100974983081, + 100975748008, + 100979063625 + ], + "samples_ts": [ + 5.07056, + 5.07052, + 5.07036 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T13:35:26Z", + "avg_ns": 139490799500, + "stddev_ns": 1902020, + "avg_ts": 3.670493, + "stddev_ts": 4.8e-05, + "samples_ns": [ + 139492886884, + 139490021933, + 139489489685 + ], + "samples_ts": [ + 3.67044, + 3.67051, + 3.67053 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1191 + }, + { + "timestamp_utc": "2025-12-11T13:45:51.395970+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:42:26Z\",\n \"avg_ns\": 25176536188,\n \"stddev_ns\": 438182,\n \"avg_ts\": 5.084099,\n \"stddev_ts\": 0.000076,\n \"samples_ns\": [ 25176449522, 25176948327, 25176210717 ],\n \"samples_ts\": [ 5.08412, 5.08402, 5.08416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:44:07Z\",\n \"avg_ns\": 34549358546,\n \"stddev_ns\": 1634742,\n \"avg_ts\": 3.704844,\n \"stddev_ts\": 0.000174,\n \"samples_ns\": [ 34551223142, 34548252193, 34548600304 ],\n \"samples_ts\": [ 3.70464, 3.70496, 3.70493 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T13:42:26Z", + "avg_ns": 25176536188, + "stddev_ns": 438182, + "avg_ts": 5.084099, + "stddev_ts": 7.6e-05, + "samples_ns": [ + 25176449522, + 25176948327, + 25176210717 + ], + "samples_ts": [ + 5.08412, + 5.08402, + 5.08416 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T13:44:07Z", + "avg_ns": 34549358546, + "stddev_ns": 1634742, + "avg_ts": 3.704844, + "stddev_ts": 0.000174, + "samples_ns": [ + 34551223142, + 34548252193, + 34548600304 + ], + "samples_ts": [ + 3.70464, + 3.70496, + 3.70493 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1192 + }, + { + "timestamp_utc": "2025-12-11T13:54:32.838186+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:45:52Z\",\n \"avg_ns\": 25184910972,\n \"stddev_ns\": 2740077,\n \"avg_ts\": 5.082408,\n \"stddev_ts\": 0.000551,\n \"samples_ns\": [ 25188016129, 25183833955, 25182882834 ],\n \"samples_ts\": [ 5.08178, 5.08263, 5.08282 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:47:33Z\",\n \"avg_ns\": 139742579188,\n \"stddev_ns\": 4023667,\n \"avg_ts\": 3.663880,\n \"stddev_ts\": 0.000105,\n \"samples_ns\": [ 139738010811, 139745371177, 139744355578 ],\n \"samples_ts\": [ 3.664, 3.66381, 3.66383 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T13:45:52Z", + "avg_ns": 25184910972, + "stddev_ns": 2740077, + "avg_ts": 5.082408, + "stddev_ts": 0.000551, + "samples_ns": [ + 25188016129, + 25183833955, + 25182882834 + ], + "samples_ts": [ + 5.08178, + 5.08263, + 5.08282 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T13:47:33Z", + "avg_ns": 139742579188, + "stddev_ns": 4023667, + "avg_ts": 3.66388, + "stddev_ts": 0.000105, + "samples_ns": [ + 139738010811, + 139745371177, + 139744355578 + ], + "samples_ts": [ + 3.664, + 3.66381, + 3.66383 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1193 + }, + { + "timestamp_utc": "2025-12-11T14:03:02.747982+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T13:54:33Z\",\n \"avg_ns\": 101165111444,\n \"stddev_ns\": 1048348,\n \"avg_ts\": 5.061033,\n \"stddev_ts\": 0.000052,\n \"samples_ns\": [ 101166319730, 101164571076, 101164443526 ],\n \"samples_ts\": [ 5.06097, 5.06106, 5.06107 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:01:18Z\",\n \"avg_ns\": 34588573971,\n \"stddev_ns\": 4951882,\n \"avg_ts\": 3.700644,\n \"stddev_ts\": 0.000530,\n \"samples_ns\": [ 34594225064, 34586503412, 34584993437 ],\n \"samples_ts\": [ 3.70004, 3.70087, 3.70103 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T13:54:33Z", + "avg_ns": 101165111444, + "stddev_ns": 1048348, + "avg_ts": 5.061033, + "stddev_ts": 5.2e-05, + "samples_ns": [ + 101166319730, + 101164571076, + 101164443526 + ], + "samples_ts": [ + 5.06097, + 5.06106, + 5.06107 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T14:01:18Z", + "avg_ns": 34588573971, + "stddev_ns": 4951882, + "avg_ts": 3.700644, + "stddev_ts": 0.00053, + "samples_ns": [ + 34594225064, + 34586503412, + 34584993437 + ], + "samples_ts": [ + 3.70004, + 3.70087, + 3.70103 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1194 + }, + { + "timestamp_utc": "2025-12-11T14:16:47.948802+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:03:03Z\",\n \"avg_ns\": 101157118780,\n \"stddev_ns\": 768139,\n \"avg_ts\": 5.061433,\n \"stddev_ts\": 0.000031,\n \"samples_ns\": [ 101157776588, 101157041257, 101156538497 ],\n \"samples_ts\": [ 5.0614, 5.06144, 5.06146 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:09:48Z\",\n \"avg_ns\": 139691450398,\n \"stddev_ns\": 5629237,\n \"avg_ts\": 3.665221,\n \"stddev_ts\": 0.000148,\n \"samples_ns\": [ 139697221285, 139691155507, 139685974402 ],\n \"samples_ts\": [ 3.66507, 3.66523, 3.66536 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T14:03:03Z", + "avg_ns": 101157118780, + "stddev_ns": 768139, + "avg_ts": 5.061433, + "stddev_ts": 3.1e-05, + "samples_ns": [ + 101157776588, + 101157041257, + 101156538497 + ], + "samples_ts": [ + 5.0614, + 5.06144, + 5.06146 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T14:09:48Z", + "avg_ns": 139691450398, + "stddev_ns": 5629237, + "avg_ts": 3.665221, + "stddev_ts": 0.000148, + "samples_ns": [ + 139697221285, + 139691155507, + 139685974402 + ], + "samples_ts": [ + 3.66507, + 3.66523, + 3.66536 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1195 + }, + { + "timestamp_utc": "2025-12-11T14:20:13.992190+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:16:49Z\",\n \"avg_ns\": 25177130418,\n \"stddev_ns\": 655686,\n \"avg_ts\": 5.083979,\n \"stddev_ts\": 0.000124,\n \"samples_ns\": [ 25176459670, 25177671080, 25177260506 ],\n \"samples_ts\": [ 5.08411, 5.08387, 5.08395 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:18:29Z\",\n \"avg_ns\": 34623296431,\n \"stddev_ns\": 1364418,\n \"avg_ts\": 3.696933,\n \"stddev_ts\": 0.000143,\n \"samples_ns\": [ 34624493270, 34621850619, 34623545406 ],\n \"samples_ts\": [ 3.69681, 3.69709, 3.69691 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T14:16:49Z", + "avg_ns": 25177130418, + "stddev_ns": 655686, + "avg_ts": 5.083979, + "stddev_ts": 0.000124, + "samples_ns": [ + 25176459670, + 25177671080, + 25177260506 + ], + "samples_ts": [ + 5.08411, + 5.08387, + 5.08395 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T14:18:29Z", + "avg_ns": 34623296431, + "stddev_ns": 1364418, + "avg_ts": 3.696933, + "stddev_ts": 0.000143, + "samples_ns": [ + 34624493270, + 34621850619, + 34623545406 + ], + "samples_ts": [ + 3.69681, + 3.69709, + 3.69691 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1196 + }, + { + "timestamp_utc": "2025-12-11T14:28:55.507481+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:20:15Z\",\n \"avg_ns\": 25187716979,\n \"stddev_ns\": 1788129,\n \"avg_ts\": 5.081842,\n \"stddev_ts\": 0.000358,\n \"samples_ns\": [ 25186869800, 25186525408, 25189755731 ],\n \"samples_ts\": [ 5.08201, 5.08208, 5.08143 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:21:55Z\",\n \"avg_ns\": 139762560010,\n \"stddev_ns\": 3262081,\n \"avg_ts\": 3.663356,\n \"stddev_ts\": 0.000085,\n \"samples_ns\": [ 139759120613, 139763003259, 139765556159 ],\n \"samples_ts\": [ 3.66345, 3.66334, 3.66328 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T14:20:15Z", + "avg_ns": 25187716979, + "stddev_ns": 1788129, + "avg_ts": 5.081842, + "stddev_ts": 0.000358, + "samples_ns": [ + 25186869800, + 25186525408, + 25189755731 + ], + "samples_ts": [ + 5.08201, + 5.08208, + 5.08143 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T14:21:55Z", + "avg_ns": 139762560010, + "stddev_ns": 3262081, + "avg_ts": 3.663356, + "stddev_ts": 8.5e-05, + "samples_ns": [ + 139759120613, + 139763003259, + 139765556159 + ], + "samples_ts": [ + 3.66345, + 3.66334, + 3.66328 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1197 + }, + { + "timestamp_utc": "2025-12-11T14:37:29.014806+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:28:56Z\",\n \"avg_ns\": 101839325667,\n \"stddev_ns\": 8068062,\n \"avg_ts\": 5.027527,\n \"stddev_ts\": 0.000398,\n \"samples_ns\": [ 101836705344, 101848371604, 101832900054 ],\n \"samples_ts\": [ 5.02766, 5.02708, 5.02784 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:35:45Z\",\n \"avg_ns\": 34526202595,\n \"stddev_ns\": 1663470,\n \"avg_ts\": 3.707329,\n \"stddev_ts\": 0.000177,\n \"samples_ns\": [ 34528097691, 34525452701, 34525057394 ],\n \"samples_ts\": [ 3.70713, 3.70741, 3.70745 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T14:28:56Z", + "avg_ns": 101839325667, + "stddev_ns": 8068062, + "avg_ts": 5.027527, + "stddev_ts": 0.000398, + "samples_ns": [ + 101836705344, + 101848371604, + 101832900054 + ], + "samples_ts": [ + 5.02766, + 5.02708, + 5.02784 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T14:35:45Z", + "avg_ns": 34526202595, + "stddev_ns": 1663470, + "avg_ts": 3.707329, + "stddev_ts": 0.000177, + "samples_ns": [ + 34528097691, + 34525452701, + 34525057394 + ], + "samples_ts": [ + 3.70713, + 3.70741, + 3.70745 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1198 + }, + { + "timestamp_utc": "2025-12-11T14:51:16.063512+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:37:30Z\",\n \"avg_ns\": 101817202735,\n \"stddev_ns\": 4309784,\n \"avg_ts\": 5.028620,\n \"stddev_ts\": 0.000212,\n \"samples_ns\": [ 101820527167, 101812365347, 101818715693 ],\n \"samples_ts\": [ 5.02846, 5.02886, 5.02855 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:44:17Z\",\n \"avg_ns\": 139427105852,\n \"stddev_ns\": 7209505,\n \"avg_ts\": 3.672170,\n \"stddev_ts\": 0.000189,\n \"samples_ns\": [ 139435391160, 139422501265, 139423425133 ],\n \"samples_ts\": [ 3.67195, 3.67229, 3.67227 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T14:37:30Z", + "avg_ns": 101817202735, + "stddev_ns": 4309784, + "avg_ts": 5.02862, + "stddev_ts": 0.000212, + "samples_ns": [ + 101820527167, + 101812365347, + 101818715693 + ], + "samples_ts": [ + 5.02846, + 5.02886, + 5.02855 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T14:44:17Z", + "avg_ns": 139427105852, + "stddev_ns": 7209505, + "avg_ts": 3.67217, + "stddev_ts": 0.000189, + "samples_ns": [ + 139435391160, + 139422501265, + 139423425133 + ], + "samples_ts": [ + 3.67195, + 3.67229, + 3.67227 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1199 + }, + { + "timestamp_utc": "2025-12-11T14:54:42.777208+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:51:17Z\",\n \"avg_ns\": 25285639800,\n \"stddev_ns\": 166996390,\n \"avg_ts\": 5.062308,\n \"stddev_ts\": 0.033307,\n \"samples_ns\": [ 25478468555, 25189998738, 25188452107 ],\n \"samples_ts\": [ 5.02385, 5.08138, 5.08169 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:52:58Z\",\n \"avg_ns\": 34595128864,\n \"stddev_ns\": 9296389,\n \"avg_ts\": 3.699943,\n \"stddev_ts\": 0.000994,\n \"samples_ns\": [ 34584411271, 34600975673, 34599999649 ],\n \"samples_ts\": [ 3.70109, 3.69932, 3.69942 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T14:51:17Z", + "avg_ns": 25285639800, + "stddev_ns": 166996390, + "avg_ts": 5.062308, + "stddev_ts": 0.033307, + "samples_ns": [ + 25478468555, + 25189998738, + 25188452107 + ], + "samples_ts": [ + 5.02385, + 5.08138, + 5.08169 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T14:52:58Z", + "avg_ns": 34595128864, + "stddev_ns": 9296389, + "avg_ts": 3.699943, + "stddev_ts": 0.000994, + "samples_ns": [ + 34584411271, + 34600975673, + 34599999649 + ], + "samples_ts": [ + 3.70109, + 3.69932, + 3.69942 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1200 + }, + { + "timestamp_utc": "2025-12-11T15:03:24.634823+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:54:43Z\",\n \"avg_ns\": 25478192822,\n \"stddev_ns\": 3003324,\n \"avg_ts\": 5.023904,\n \"stddev_ts\": 0.000590,\n \"samples_ns\": [ 25481638967, 25476718884, 25476220617 ],\n \"samples_ts\": [ 5.02322, 5.02419, 5.02429 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T14:56:25Z\",\n \"avg_ns\": 139476535940,\n \"stddev_ns\": 68650498,\n \"avg_ts\": 3.670869,\n \"stddev_ts\": 0.001806,\n \"samples_ns\": [ 139555434308, 139430450710, 139443722803 ],\n \"samples_ts\": [ 3.66879, 3.67208, 3.67173 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T14:54:43Z", + "avg_ns": 25478192822, + "stddev_ns": 3003324, + "avg_ts": 5.023904, + "stddev_ts": 0.00059, + "samples_ns": [ + 25481638967, + 25476718884, + 25476220617 + ], + "samples_ts": [ + 5.02322, + 5.02419, + 5.02429 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T14:56:25Z", + "avg_ns": 139476535940, + "stddev_ns": 68650498, + "avg_ts": 3.670869, + "stddev_ts": 0.001806, + "samples_ns": [ + 139555434308, + 139430450710, + 139443722803 + ], + "samples_ts": [ + 3.66879, + 3.67208, + 3.67173 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1201 + }, + { + "timestamp_utc": "2025-12-11T15:11:54.014247+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:03:25Z\",\n \"avg_ns\": 101011451633,\n \"stddev_ns\": 3788497,\n \"avg_ts\": 5.068732,\n \"stddev_ts\": 0.000189,\n \"samples_ns\": [ 101015103392, 101011662709, 101007588800 ],\n \"samples_ts\": [ 5.06855, 5.06872, 5.06893 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:10:09Z\",\n \"avg_ns\": 34613902154,\n \"stddev_ns\": 4565722,\n \"avg_ts\": 3.697936,\n \"stddev_ts\": 0.000487,\n \"samples_ns\": [ 34617805797, 34615013402, 34608887264 ],\n \"samples_ts\": [ 3.69752, 3.69782, 3.69847 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T15:03:25Z", + "avg_ns": 101011451633, + "stddev_ns": 3788497, + "avg_ts": 5.068732, + "stddev_ts": 0.000189, + "samples_ns": [ + 101015103392, + 101011662709, + 101007588800 + ], + "samples_ts": [ + 5.06855, + 5.06872, + 5.06893 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T15:10:09Z", + "avg_ns": 34613902154, + "stddev_ns": 4565722, + "avg_ts": 3.697936, + "stddev_ts": 0.000487, + "samples_ns": [ + 34617805797, + 34615013402, + 34608887264 + ], + "samples_ts": [ + 3.69752, + 3.69782, + 3.69847 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1202 + }, + { + "timestamp_utc": "2025-12-11T15:25:38.371290+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:11:55Z\",\n \"avg_ns\": 100999966595,\n \"stddev_ns\": 1250041,\n \"avg_ts\": 5.069309,\n \"stddev_ts\": 0.000059,\n \"samples_ns\": [ 101001206060, 100999803373, 100998890354 ],\n \"samples_ts\": [ 5.06925, 5.06932, 5.06936 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:18:39Z\",\n \"avg_ns\": 139612377773,\n \"stddev_ns\": 7055220,\n \"avg_ts\": 3.667297,\n \"stddev_ts\": 0.000185,\n \"samples_ns\": [ 139619426119, 139612351820, 139605355382 ],\n \"samples_ts\": [ 3.66711, 3.6673, 3.66748 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T15:11:55Z", + "avg_ns": 100999966595, + "stddev_ns": 1250041, + "avg_ts": 5.069309, + "stddev_ts": 5.9e-05, + "samples_ns": [ + 101001206060, + 100999803373, + 100998890354 + ], + "samples_ts": [ + 5.06925, + 5.06932, + 5.06936 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T15:18:39Z", + "avg_ns": 139612377773, + "stddev_ns": 7055220, + "avg_ts": 3.667297, + "stddev_ts": 0.000185, + "samples_ns": [ + 139619426119, + 139612351820, + 139605355382 + ], + "samples_ts": [ + 3.66711, + 3.6673, + 3.66748 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1203 + }, + { + "timestamp_utc": "2025-12-11T15:29:04.283729+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:25:39Z\",\n \"avg_ns\": 25184254973,\n \"stddev_ns\": 256858,\n \"avg_ts\": 5.082541,\n \"stddev_ts\": 0.000052,\n \"samples_ns\": [ 25183967470, 25184335613, 25184461836 ],\n \"samples_ts\": [ 5.0826, 5.08252, 5.0825 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:27:20Z\",\n \"avg_ns\": 34549274052,\n \"stddev_ns\": 940562,\n \"avg_ts\": 3.704854,\n \"stddev_ts\": 0.000099,\n \"samples_ns\": [ 34550015292, 34548241593, 34549565272 ],\n \"samples_ts\": [ 3.70477, 3.70496, 3.70482 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T15:25:39Z", + "avg_ns": 25184254973, + "stddev_ns": 256858, + "avg_ts": 5.082541, + "stddev_ts": 5.2e-05, + "samples_ns": [ + 25183967470, + 25184335613, + 25184461836 + ], + "samples_ts": [ + 5.0826, + 5.08252, + 5.0825 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T15:27:20Z", + "avg_ns": 34549274052, + "stddev_ns": 940562, + "avg_ts": 3.704854, + "stddev_ts": 9.9e-05, + "samples_ns": [ + 34550015292, + 34548241593, + 34549565272 + ], + "samples_ts": [ + 3.70477, + 3.70496, + 3.70482 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1204 + }, + { + "timestamp_utc": "2025-12-11T15:37:44.789377+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:29:05Z\",\n \"avg_ns\": 25180844427,\n \"stddev_ns\": 3011569,\n \"avg_ts\": 5.083229,\n \"stddev_ts\": 0.000607,\n \"samples_ns\": [ 25184290785, 25179490481, 25178752016 ],\n \"samples_ts\": [ 5.08253, 5.0835, 5.08365 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:30:46Z\",\n \"avg_ns\": 139421360881,\n \"stddev_ns\": 5869413,\n \"avg_ts\": 3.672321,\n \"stddev_ts\": 0.000154,\n \"samples_ns\": [ 139414599199, 139424599121, 139424884324 ],\n \"samples_ts\": [ 3.6725, 3.67224, 3.67223 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T15:29:05Z", + "avg_ns": 25180844427, + "stddev_ns": 3011569, + "avg_ts": 5.083229, + "stddev_ts": 0.000607, + "samples_ns": [ + 25184290785, + 25179490481, + 25178752016 + ], + "samples_ts": [ + 5.08253, + 5.0835, + 5.08365 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T15:30:46Z", + "avg_ns": 139421360881, + "stddev_ns": 5869413, + "avg_ts": 3.672321, + "stddev_ts": 0.000154, + "samples_ns": [ + 139414599199, + 139424599121, + 139424884324 + ], + "samples_ts": [ + 3.6725, + 3.67224, + 3.67223 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1205 + }, + { + "timestamp_utc": "2025-12-11T15:46:14.809050+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:37:45Z\",\n \"avg_ns\": 101186851951,\n \"stddev_ns\": 3121139,\n \"avg_ts\": 5.059946,\n \"stddev_ts\": 0.000155,\n \"samples_ns\": [ 101185420478, 101190414351, 101184721025 ],\n \"samples_ts\": [ 5.06002, 5.05977, 5.06005 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:44:30Z\",\n \"avg_ns\": 34581800782,\n \"stddev_ns\": 1809556,\n \"avg_ts\": 3.701369,\n \"stddev_ts\": 0.000193,\n \"samples_ns\": [ 34583875822, 34580866085, 34580660440 ],\n \"samples_ts\": [ 3.70115, 3.70147, 3.70149 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T15:37:45Z", + "avg_ns": 101186851951, + "stddev_ns": 3121139, + "avg_ts": 5.059946, + "stddev_ts": 0.000155, + "samples_ns": [ + 101185420478, + 101190414351, + 101184721025 + ], + "samples_ts": [ + 5.06002, + 5.05977, + 5.06005 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T15:44:30Z", + "avg_ns": 34581800782, + "stddev_ns": 1809556, + "avg_ts": 3.701369, + "stddev_ts": 0.000193, + "samples_ns": [ + 34583875822, + 34580866085, + 34580660440 + ], + "samples_ts": [ + 3.70115, + 3.70147, + 3.70149 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1206 + }, + { + "timestamp_utc": "2025-12-11T16:00:00.180701+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:46:15Z\",\n \"avg_ns\": 101147581125,\n \"stddev_ns\": 76736,\n \"avg_ts\": 5.061910,\n \"stddev_ts\": 0.000004,\n \"samples_ns\": [ 101147639210, 101147610031, 101147494134 ],\n \"samples_ts\": [ 5.06191, 5.06191, 5.06191 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T15:53:00Z\",\n \"avg_ns\": 139749786120,\n \"stddev_ns\": 33659748,\n \"avg_ts\": 3.663691,\n \"stddev_ts\": 0.000882,\n \"samples_ns\": [ 139788647646, 139729794116, 139730916598 ],\n \"samples_ts\": [ 3.66267, 3.66421, 3.66419 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T15:46:15Z", + "avg_ns": 101147581125, + "stddev_ns": 76736, + "avg_ts": 5.06191, + "stddev_ts": 4e-06, + "samples_ns": [ + 101147639210, + 101147610031, + 101147494134 + ], + "samples_ts": [ + 5.06191, + 5.06191, + 5.06191 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T15:53:00Z", + "avg_ns": 139749786120, + "stddev_ns": 33659748, + "avg_ts": 3.663691, + "stddev_ts": 0.000882, + "samples_ns": [ + 139788647646, + 139729794116, + 139730916598 + ], + "samples_ts": [ + 3.66267, + 3.66421, + 3.66419 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1207 + }, + { + "timestamp_utc": "2025-12-11T16:03:26.671220+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:00:01Z\",\n \"avg_ns\": 25313580685,\n \"stddev_ns\": 156187248,\n \"avg_ts\": 5.056702,\n \"stddev_ts\": 0.031135,\n \"samples_ns\": [ 25485266424, 25275564707, 25179910926 ],\n \"samples_ts\": [ 5.02251, 5.06418, 5.08342 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:01:42Z\",\n \"avg_ns\": 34561676432,\n \"stddev_ns\": 1867595,\n \"avg_ts\": 3.703524,\n \"stddev_ts\": 0.000200,\n \"samples_ns\": [ 34563404001, 34559694819, 34561930476 ],\n \"samples_ts\": [ 3.70334, 3.70374, 3.7035 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T16:00:01Z", + "avg_ns": 25313580685, + "stddev_ns": 156187248, + "avg_ts": 5.056702, + "stddev_ts": 0.031135, + "samples_ns": [ + 25485266424, + 25275564707, + 25179910926 + ], + "samples_ts": [ + 5.02251, + 5.06418, + 5.08342 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T16:01:42Z", + "avg_ns": 34561676432, + "stddev_ns": 1867595, + "avg_ts": 3.703524, + "stddev_ts": 0.0002, + "samples_ns": [ + 34563404001, + 34559694819, + 34561930476 + ], + "samples_ts": [ + 3.70334, + 3.70374, + 3.7035 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1208 + }, + { + "timestamp_utc": "2025-12-11T16:12:07.317939+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:03:27Z\",\n \"avg_ns\": 25180343453,\n \"stddev_ns\": 1210759,\n \"avg_ts\": 5.083330,\n \"stddev_ts\": 0.000244,\n \"samples_ns\": [ 25181159648, 25180918365, 25178952346 ],\n \"samples_ts\": [ 5.08317, 5.08321, 5.08361 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:05:08Z\",\n \"avg_ns\": 139451254277,\n \"stddev_ns\": 5180545,\n \"avg_ts\": 3.671534,\n \"stddev_ts\": 0.000136,\n \"samples_ns\": [ 139457147357, 139447417932, 139449197542 ],\n \"samples_ts\": [ 3.67138, 3.67163, 3.67159 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T16:03:27Z", + "avg_ns": 25180343453, + "stddev_ns": 1210759, + "avg_ts": 5.08333, + "stddev_ts": 0.000244, + "samples_ns": [ + 25181159648, + 25180918365, + 25178952346 + ], + "samples_ts": [ + 5.08317, + 5.08321, + 5.08361 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T16:05:08Z", + "avg_ns": 139451254277, + "stddev_ns": 5180545, + "avg_ts": 3.671534, + "stddev_ts": 0.000136, + "samples_ns": [ + 139457147357, + 139447417932, + 139449197542 + ], + "samples_ts": [ + 3.67138, + 3.67163, + 3.67159 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1209 + }, + { + "timestamp_utc": "2025-12-11T16:20:39.768559+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:12:08Z\",\n \"avg_ns\": 101841375060,\n \"stddev_ns\": 1779177,\n \"avg_ts\": 5.027426,\n \"stddev_ts\": 0.000088,\n \"samples_ns\": [ 101842825603, 101839389861, 101841909716 ],\n \"samples_ts\": [ 5.02735, 5.02752, 5.0274 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:18:55Z\",\n \"avg_ns\": 34501829596,\n \"stddev_ns\": 3081194,\n \"avg_ts\": 3.709948,\n \"stddev_ts\": 0.000331,\n \"samples_ns\": [ 34505375570, 34499886865, 34500226354 ],\n \"samples_ts\": [ 3.70957, 3.71016, 3.71012 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T16:12:08Z", + "avg_ns": 101841375060, + "stddev_ns": 1779177, + "avg_ts": 5.027426, + "stddev_ts": 8.8e-05, + "samples_ns": [ + 101842825603, + 101839389861, + 101841909716 + ], + "samples_ts": [ + 5.02735, + 5.02752, + 5.0274 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T16:18:55Z", + "avg_ns": 34501829596, + "stddev_ns": 3081194, + "avg_ts": 3.709948, + "stddev_ts": 0.000331, + "samples_ns": [ + 34505375570, + 34499886865, + 34500226354 + ], + "samples_ts": [ + 3.70957, + 3.71016, + 3.71012 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1210 + }, + { + "timestamp_utc": "2025-12-11T16:34:27.946674+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:20:40Z\",\n \"avg_ns\": 101830386556,\n \"stddev_ns\": 4358729,\n \"avg_ts\": 5.027969,\n \"stddev_ts\": 0.000214,\n \"samples_ns\": [ 101826156149, 101834819648, 101830183873 ],\n \"samples_ts\": [ 5.02818, 5.02775, 5.02798 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:27:28Z\",\n \"avg_ns\": 139752937583,\n \"stddev_ns\": 3224444,\n \"avg_ts\": 3.663608,\n \"stddev_ts\": 0.000085,\n \"samples_ns\": [ 139756288251, 139752668239, 139749856259 ],\n \"samples_ts\": [ 3.66352, 3.66362, 3.66369 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T16:20:40Z", + "avg_ns": 101830386556, + "stddev_ns": 4358729, + "avg_ts": 5.027969, + "stddev_ts": 0.000214, + "samples_ns": [ + 101826156149, + 101834819648, + 101830183873 + ], + "samples_ts": [ + 5.02818, + 5.02775, + 5.02798 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T16:27:28Z", + "avg_ns": 139752937583, + "stddev_ns": 3224444, + "avg_ts": 3.663608, + "stddev_ts": 8.5e-05, + "samples_ns": [ + 139756288251, + 139752668239, + 139749856259 + ], + "samples_ts": [ + 3.66352, + 3.66362, + 3.66369 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1211 + }, + { + "timestamp_utc": "2025-12-11T16:37:53.870564+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:34:29Z\",\n \"avg_ns\": 25182659335,\n \"stddev_ns\": 1933220,\n \"avg_ts\": 5.082863,\n \"stddev_ts\": 0.000389,\n \"samples_ns\": [ 25181775946, 25184869328, 25181332732 ],\n \"samples_ts\": [ 5.08304, 5.08242, 5.08313 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:36:09Z\",\n \"avg_ns\": 34533781009,\n \"stddev_ns\": 2397639,\n \"avg_ts\": 3.706516,\n \"stddev_ts\": 0.000256,\n \"samples_ns\": [ 34534697968, 34535569528, 34531075533 ],\n \"samples_ts\": [ 3.70642, 3.70632, 3.70681 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T16:34:29Z", + "avg_ns": 25182659335, + "stddev_ns": 1933220, + "avg_ts": 5.082863, + "stddev_ts": 0.000389, + "samples_ns": [ + 25181775946, + 25184869328, + 25181332732 + ], + "samples_ts": [ + 5.08304, + 5.08242, + 5.08313 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T16:36:09Z", + "avg_ns": 34533781009, + "stddev_ns": 2397639, + "avg_ts": 3.706516, + "stddev_ts": 0.000256, + "samples_ns": [ + 34534697968, + 34535569528, + 34531075533 + ], + "samples_ts": [ + 3.70642, + 3.70632, + 3.70681 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1212 + }, + { + "timestamp_utc": "2025-12-11T16:46:36.238541+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:37:55Z\",\n \"avg_ns\": 25188183601,\n \"stddev_ns\": 2498055,\n \"avg_ts\": 5.081748,\n \"stddev_ts\": 0.000503,\n \"samples_ns\": [ 25190869853, 25187736653, 25185944298 ],\n \"samples_ts\": [ 5.08121, 5.08184, 5.0822 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:39:35Z\",\n \"avg_ns\": 139993388034,\n \"stddev_ns\": 4308136,\n \"avg_ts\": 3.657316,\n \"stddev_ts\": 0.000112,\n \"samples_ns\": [ 139988796886, 139994111647, 139997255571 ],\n \"samples_ts\": [ 3.65744, 3.6573, 3.65721 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T16:37:55Z", + "avg_ns": 25188183601, + "stddev_ns": 2498055, + "avg_ts": 5.081748, + "stddev_ts": 0.000503, + "samples_ns": [ + 25190869853, + 25187736653, + 25185944298 + ], + "samples_ts": [ + 5.08121, + 5.08184, + 5.0822 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T16:39:35Z", + "avg_ns": 139993388034, + "stddev_ns": 4308136, + "avg_ts": 3.657316, + "stddev_ts": 0.000112, + "samples_ns": [ + 139988796886, + 139994111647, + 139997255571 + ], + "samples_ts": [ + 3.65744, + 3.6573, + 3.65721 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1213 + }, + { + "timestamp_utc": "2025-12-11T16:55:05.439426+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:46:37Z\",\n \"avg_ns\": 101007010328,\n \"stddev_ns\": 4612305,\n \"avg_ts\": 5.068955,\n \"stddev_ts\": 0.000231,\n \"samples_ns\": [ 101008955948, 101010319256, 101001755781 ],\n \"samples_ts\": [ 5.06886, 5.06879, 5.06922 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:53:21Z\",\n \"avg_ns\": 34539547222,\n \"stddev_ns\": 3733434,\n \"avg_ts\": 3.705897,\n \"stddev_ts\": 0.000401,\n \"samples_ns\": [ 34543857331, 34537468004, 34537316331 ],\n \"samples_ts\": [ 3.70543, 3.70612, 3.70614 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T16:46:37Z", + "avg_ns": 101007010328, + "stddev_ns": 4612305, + "avg_ts": 5.068955, + "stddev_ts": 0.000231, + "samples_ns": [ + 101008955948, + 101010319256, + 101001755781 + ], + "samples_ts": [ + 5.06886, + 5.06879, + 5.06922 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T16:53:21Z", + "avg_ns": 34539547222, + "stddev_ns": 3733434, + "avg_ts": 3.705897, + "stddev_ts": 0.000401, + "samples_ns": [ + 34543857331, + 34537468004, + 34537316331 + ], + "samples_ts": [ + 3.70543, + 3.70612, + 3.70614 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1214 + }, + { + "timestamp_utc": "2025-12-11T17:08:49.307546+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T16:55:06Z\",\n \"avg_ns\": 101002167653,\n \"stddev_ns\": 1704498,\n \"avg_ts\": 5.069198,\n \"stddev_ts\": 0.000083,\n \"samples_ns\": [ 101001143178, 101001295641, 101004064142 ],\n \"samples_ts\": [ 5.06925, 5.06924, 5.0691 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:01:50Z\",\n \"avg_ns\": 139417248011,\n \"stddev_ns\": 6598655,\n \"avg_ts\": 3.672429,\n \"stddev_ts\": 0.000173,\n \"samples_ns\": [ 139421381171, 139409663174, 139420699690 ],\n \"samples_ts\": [ 3.67232, 3.67263, 3.67234 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T16:55:06Z", + "avg_ns": 101002167653, + "stddev_ns": 1704498, + "avg_ts": 5.069198, + "stddev_ts": 8.3e-05, + "samples_ns": [ + 101001143178, + 101001295641, + 101004064142 + ], + "samples_ts": [ + 5.06925, + 5.06924, + 5.0691 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T17:01:50Z", + "avg_ns": 139417248011, + "stddev_ns": 6598655, + "avg_ts": 3.672429, + "stddev_ts": 0.000173, + "samples_ns": [ + 139421381171, + 139409663174, + 139420699690 + ], + "samples_ts": [ + 3.67232, + 3.67263, + 3.67234 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1215 + }, + { + "timestamp_utc": "2025-12-11T17:12:15.198242+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:08:50Z\",\n \"avg_ns\": 25179076290,\n \"stddev_ns\": 478268,\n \"avg_ts\": 5.083586,\n \"stddev_ts\": 0.000085,\n \"samples_ns\": [ 25179532467, 25178698836, 25178997569 ],\n \"samples_ts\": [ 5.08349, 5.08366, 5.0836 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:10:31Z\",\n \"avg_ns\": 34536786151,\n \"stddev_ns\": 2276702,\n \"avg_ts\": 3.706193,\n \"stddev_ts\": 0.000244,\n \"samples_ns\": [ 34539412473, 34535371991, 34535573989 ],\n \"samples_ts\": [ 3.70591, 3.70634, 3.70632 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T17:08:50Z", + "avg_ns": 25179076290, + "stddev_ns": 478268, + "avg_ts": 5.083586, + "stddev_ts": 8.5e-05, + "samples_ns": [ + 25179532467, + 25178698836, + 25178997569 + ], + "samples_ts": [ + 5.08349, + 5.08366, + 5.0836 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T17:10:31Z", + "avg_ns": 34536786151, + "stddev_ns": 2276702, + "avg_ts": 3.706193, + "stddev_ts": 0.000244, + "samples_ns": [ + 34539412473, + 34535371991, + 34535573989 + ], + "samples_ts": [ + 3.70591, + 3.70634, + 3.70632 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1216 + }, + { + "timestamp_utc": "2025-12-11T17:20:57.480344+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:12:16Z\",\n \"avg_ns\": 25188390678,\n \"stddev_ns\": 386590,\n \"avg_ts\": 5.081706,\n \"stddev_ts\": 0.000071,\n \"samples_ns\": [ 25188705777, 25188009974, 25188456284 ],\n \"samples_ts\": [ 5.08164, 5.08178, 5.08169 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:13:57Z\",\n \"avg_ns\": 139976454896,\n \"stddev_ns\": 8690912,\n \"avg_ts\": 3.657758,\n \"stddev_ts\": 0.000227,\n \"samples_ns\": [ 139984056131, 139978316250, 139966992308 ],\n \"samples_ts\": [ 3.65756, 3.65771, 3.65801 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T17:12:16Z", + "avg_ns": 25188390678, + "stddev_ns": 386590, + "avg_ts": 5.081706, + "stddev_ts": 7.1e-05, + "samples_ns": [ + 25188705777, + 25188009974, + 25188456284 + ], + "samples_ts": [ + 5.08164, + 5.08178, + 5.08169 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T17:13:57Z", + "avg_ns": 139976454896, + "stddev_ns": 8690912, + "avg_ts": 3.657758, + "stddev_ts": 0.000227, + "samples_ns": [ + 139984056131, + 139978316250, + 139966992308 + ], + "samples_ts": [ + 3.65756, + 3.65771, + 3.65801 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1217 + }, + { + "timestamp_utc": "2025-12-11T17:29:27.295734+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:20:58Z\",\n \"avg_ns\": 101149270792,\n \"stddev_ns\": 2177306,\n \"avg_ts\": 5.061826,\n \"stddev_ts\": 0.000107,\n \"samples_ns\": [ 101151730635, 101148056593, 101148025150 ],\n \"samples_ts\": [ 5.0617, 5.06189, 5.06189 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:27:43Z\",\n \"avg_ns\": 34552816449,\n \"stddev_ns\": 6944458,\n \"avg_ts\": 3.704474,\n \"stddev_ts\": 0.000744,\n \"samples_ns\": [ 34560810447, 34548341541, 34549297361 ],\n \"samples_ts\": [ 3.70362, 3.70495, 3.70485 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T17:20:58Z", + "avg_ns": 101149270792, + "stddev_ns": 2177306, + "avg_ts": 5.061826, + "stddev_ts": 0.000107, + "samples_ns": [ + 101151730635, + 101148056593, + 101148025150 + ], + "samples_ts": [ + 5.0617, + 5.06189, + 5.06189 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T17:27:43Z", + "avg_ns": 34552816449, + "stddev_ns": 6944458, + "avg_ts": 3.704474, + "stddev_ts": 0.000744, + "samples_ns": [ + 34560810447, + 34548341541, + 34549297361 + ], + "samples_ts": [ + 3.70362, + 3.70495, + 3.70485 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1218 + }, + { + "timestamp_utc": "2025-12-11T17:43:12.363013+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:29:28Z\",\n \"avg_ns\": 101158899817,\n \"stddev_ns\": 938942,\n \"avg_ts\": 5.061344,\n \"stddev_ts\": 0.000041,\n \"samples_ns\": [ 101158493327, 101158357832, 101159848294 ],\n \"samples_ts\": [ 5.06136, 5.06137, 5.0613 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:36:13Z\",\n \"avg_ns\": 139621420933,\n \"stddev_ns\": 1764713,\n \"avg_ts\": 3.667059,\n \"stddev_ts\": 0.000045,\n \"samples_ns\": [ 139619787693, 139621250646, 139623224461 ],\n \"samples_ts\": [ 3.6671, 3.66706, 3.66701 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T17:29:28Z", + "avg_ns": 101158899817, + "stddev_ns": 938942, + "avg_ts": 5.061344, + "stddev_ts": 4.1e-05, + "samples_ns": [ + 101158493327, + 101158357832, + 101159848294 + ], + "samples_ts": [ + 5.06136, + 5.06137, + 5.0613 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T17:36:13Z", + "avg_ns": 139621420933, + "stddev_ns": 1764713, + "avg_ts": 3.667059, + "stddev_ts": 4.5e-05, + "samples_ns": [ + 139619787693, + 139621250646, + 139623224461 + ], + "samples_ts": [ + 3.6671, + 3.66706, + 3.66701 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1219 + }, + { + "timestamp_utc": "2025-12-11T17:46:38.181082+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:43:13Z\",\n \"avg_ns\": 25178992719,\n \"stddev_ns\": 2374887,\n \"avg_ts\": 5.083603,\n \"stddev_ts\": 0.000478,\n \"samples_ns\": [ 25181696554, 25178003944, 25177277660 ],\n \"samples_ts\": [ 5.08306, 5.0838, 5.08395 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:44:54Z\",\n \"avg_ns\": 34523260212,\n \"stddev_ns\": 1092763,\n \"avg_ts\": 3.707645,\n \"stddev_ts\": 0.000116,\n \"samples_ns\": [ 34524250390, 34523416483, 34522113764 ],\n \"samples_ts\": [ 3.70754, 3.70763, 3.70777 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T17:43:13Z", + "avg_ns": 25178992719, + "stddev_ns": 2374887, + "avg_ts": 5.083603, + "stddev_ts": 0.000478, + "samples_ns": [ + 25181696554, + 25178003944, + 25177277660 + ], + "samples_ts": [ + 5.08306, + 5.0838, + 5.08395 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T17:44:54Z", + "avg_ns": 34523260212, + "stddev_ns": 1092763, + "avg_ts": 3.707645, + "stddev_ts": 0.000116, + "samples_ns": [ + 34524250390, + 34523416483, + 34522113764 + ], + "samples_ts": [ + 3.70754, + 3.70763, + 3.70777 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1220 + }, + { + "timestamp_utc": "2025-12-11T17:55:20.859451+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:46:39Z\",\n \"avg_ns\": 25179087221,\n \"stddev_ns\": 3430894,\n \"avg_ts\": 5.083584,\n \"stddev_ts\": 0.000693,\n \"samples_ns\": [ 25183048871, 25177112414, 25177100378 ],\n \"samples_ts\": [ 5.08278, 5.08398, 5.08398 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:48:20Z\",\n \"avg_ns\": 140119398630,\n \"stddev_ns\": 2428364,\n \"avg_ts\": 3.654027,\n \"stddev_ts\": 0.000063,\n \"samples_ns\": [ 140116602045, 140120973813, 140120620032 ],\n \"samples_ts\": [ 3.6541, 3.65399, 3.65399 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T17:46:39Z", + "avg_ns": 25179087221, + "stddev_ns": 3430894, + "avg_ts": 5.083584, + "stddev_ts": 0.000693, + "samples_ns": [ + 25183048871, + 25177112414, + 25177100378 + ], + "samples_ts": [ + 5.08278, + 5.08398, + 5.08398 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T17:48:20Z", + "avg_ns": 140119398630, + "stddev_ns": 2428364, + "avg_ts": 3.654027, + "stddev_ts": 6.3e-05, + "samples_ns": [ + 140116602045, + 140120973813, + 140120620032 + ], + "samples_ts": [ + 3.6541, + 3.65399, + 3.65399 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1221 + }, + { + "timestamp_utc": "2025-12-11T18:03:53.452263+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T17:55:22Z\",\n \"avg_ns\": 101821864809,\n \"stddev_ns\": 2314421,\n \"avg_ts\": 5.028390,\n \"stddev_ts\": 0.000112,\n \"samples_ns\": [ 101824450548, 101820199958, 101820943923 ],\n \"samples_ts\": [ 5.02826, 5.02847, 5.02844 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:02:09Z\",\n \"avg_ns\": 34581446638,\n \"stddev_ns\": 3995269,\n \"avg_ts\": 3.701407,\n \"stddev_ts\": 0.000427,\n \"samples_ns\": [ 34585640708, 34577695841, 34581003366 ],\n \"samples_ts\": [ 3.70096, 3.70181, 3.70145 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T17:55:22Z", + "avg_ns": 101821864809, + "stddev_ns": 2314421, + "avg_ts": 5.02839, + "stddev_ts": 0.000112, + "samples_ns": [ + 101824450548, + 101820199958, + 101820943923 + ], + "samples_ts": [ + 5.02826, + 5.02847, + 5.02844 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T18:02:09Z", + "avg_ns": 34581446638, + "stddev_ns": 3995269, + "avg_ts": 3.701407, + "stddev_ts": 0.000427, + "samples_ns": [ + 34585640708, + 34577695841, + 34581003366 + ], + "samples_ts": [ + 3.70096, + 3.70181, + 3.70145 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1222 + }, + { + "timestamp_utc": "2025-12-11T18:17:40.643379+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:03:54Z\",\n \"avg_ns\": 101823034398,\n \"stddev_ns\": 2590808,\n \"avg_ts\": 5.028332,\n \"stddev_ts\": 0.000127,\n \"samples_ns\": [ 101825868007, 101822384708, 101820850480 ],\n \"samples_ts\": [ 5.02819, 5.02836, 5.02844 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:10:42Z\",\n \"avg_ns\": 139425056844,\n \"stddev_ns\": 7676280,\n \"avg_ts\": 3.672224,\n \"stddev_ts\": 0.000202,\n \"samples_ns\": [ 139433783901, 139421983558, 139419403074 ],\n \"samples_ts\": [ 3.67199, 3.6723, 3.67237 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T18:03:54Z", + "avg_ns": 101823034398, + "stddev_ns": 2590808, + "avg_ts": 5.028332, + "stddev_ts": 0.000127, + "samples_ns": [ + 101825868007, + 101822384708, + 101820850480 + ], + "samples_ts": [ + 5.02819, + 5.02836, + 5.02844 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T18:10:42Z", + "avg_ns": 139425056844, + "stddev_ns": 7676280, + "avg_ts": 3.672224, + "stddev_ts": 0.000202, + "samples_ns": [ + 139433783901, + 139421983558, + 139419403074 + ], + "samples_ts": [ + 3.67199, + 3.6723, + 3.67237 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1223 + }, + { + "timestamp_utc": "2025-12-11T18:20:06.022423+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:17:41Z\",\n \"avg_ns\": 17021583351,\n \"stddev_ns\": 6799910,\n \"avg_ts\": 7.519865,\n \"stddev_ts\": 0.003003,\n \"samples_ns\": [ 17022488940, 17027882570, 17014378545 ],\n \"samples_ts\": [ 7.51946, 7.51708, 7.52305 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:18:49Z\",\n \"avg_ns\": 25270001580,\n \"stddev_ns\": 5932131,\n \"avg_ts\": 5.065295,\n \"stddev_ts\": 0.001189,\n \"samples_ns\": [ 25273947269, 25263182235, 25272875237 ],\n \"samples_ts\": [ 5.0645, 5.06666, 5.06472 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T18:17:41Z", + "avg_ns": 17021583351, + "stddev_ns": 6799910, + "avg_ts": 7.519865, + "stddev_ts": 0.003003, + "samples_ns": [ + 17022488940, + 17027882570, + 17014378545 + ], + "samples_ts": [ + 7.51946, + 7.51708, + 7.52305 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T18:18:49Z", + "avg_ns": 25270001580, + "stddev_ns": 5932131, + "avg_ts": 5.065295, + "stddev_ts": 0.001189, + "samples_ns": [ + 25273947269, + 25263182235, + 25272875237 + ], + "samples_ts": [ + 5.0645, + 5.06666, + 5.06472 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1224 + }, + { + "timestamp_utc": "2025-12-11T18:26:22.708918+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:20:07Z\",\n \"avg_ns\": 17023055440,\n \"stddev_ns\": 910873,\n \"avg_ts\": 7.519214,\n \"stddev_ts\": 0.000394,\n \"samples_ns\": [ 17023459754, 17022032892, 17023673676 ],\n \"samples_ts\": [ 7.51904, 7.51967, 7.51894 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:21:15Z\",\n \"avg_ns\": 102364016378,\n \"stddev_ns\": 74856803,\n \"avg_ts\": 5.001759,\n \"stddev_ts\": 0.003657,\n \"samples_ns\": [ 102341263741, 102303177225, 102447608170 ],\n \"samples_ts\": [ 5.00287, 5.00473, 4.99768 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T18:20:07Z", + "avg_ns": 17023055440, + "stddev_ns": 910873, + "avg_ts": 7.519214, + "stddev_ts": 0.000394, + "samples_ns": [ + 17023459754, + 17022032892, + 17023673676 + ], + "samples_ts": [ + 7.51904, + 7.51967, + 7.51894 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T18:21:15Z", + "avg_ns": 102364016378, + "stddev_ns": 74856803, + "avg_ts": 5.001759, + "stddev_ts": 0.003657, + "samples_ns": [ + 102341263741, + 102303177225, + 102447608170 + ], + "samples_ts": [ + 5.00287, + 5.00473, + 4.99768 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1225 + }, + { + "timestamp_utc": "2025-12-11T18:32:12.513508+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:26:23Z\",\n \"avg_ns\": 68025168952,\n \"stddev_ns\": 215234788,\n \"avg_ts\": 7.526676,\n \"stddev_ts\": 0.023806,\n \"samples_ns\": [ 68248337412, 68008309720, 67818859725 ],\n \"samples_ts\": [ 7.50201, 7.52849, 7.54952 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:30:56Z\",\n \"avg_ns\": 25330850614,\n \"stddev_ns\": 6767686,\n \"avg_ts\": 5.053127,\n \"stddev_ts\": 0.001350,\n \"samples_ns\": [ 25323168006, 25335930760, 25333453076 ],\n \"samples_ts\": [ 5.05466, 5.05211, 5.05261 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T18:26:23Z", + "avg_ns": 68025168952, + "stddev_ns": 215234788, + "avg_ts": 7.526676, + "stddev_ts": 0.023806, + "samples_ns": [ + 68248337412, + 68008309720, + 67818859725 + ], + "samples_ts": [ + 7.50201, + 7.52849, + 7.54952 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T18:30:56Z", + "avg_ns": 25330850614, + "stddev_ns": 6767686, + "avg_ts": 5.053127, + "stddev_ts": 0.00135, + "samples_ns": [ + 25323168006, + 25335930760, + 25333453076 + ], + "samples_ts": [ + 5.05466, + 5.05211, + 5.05261 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1226 + }, + { + "timestamp_utc": "2025-12-11T18:41:54.362848+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:32:13Z\",\n \"avg_ns\": 68273173357,\n \"stddev_ns\": 8990819,\n \"avg_ts\": 7.499285,\n \"stddev_ts\": 0.000987,\n \"samples_ns\": [ 68273368385, 68264094209, 68282057479 ],\n \"samples_ts\": [ 7.49926, 7.50028, 7.49831 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:36:46Z\",\n \"avg_ns\": 102418810277,\n \"stddev_ns\": 121004656,\n \"avg_ts\": 4.999086,\n \"stddev_ts\": 0.005904,\n \"samples_ns\": [ 102313336934, 102392184160, 102550909739 ],\n \"samples_ts\": [ 5.00424, 5.00038, 4.99264 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T18:32:13Z", + "avg_ns": 68273173357, + "stddev_ns": 8990819, + "avg_ts": 7.499285, + "stddev_ts": 0.000987, + "samples_ns": [ + 68273368385, + 68264094209, + 68282057479 + ], + "samples_ts": [ + 7.49926, + 7.50028, + 7.49831 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T18:36:46Z", + "avg_ns": 102418810277, + "stddev_ns": 121004656, + "avg_ts": 4.999086, + "stddev_ts": 0.005904, + "samples_ns": [ + 102313336934, + 102392184160, + 102550909739 + ], + "samples_ts": [ + 5.00424, + 5.00038, + 4.99264 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1227 + }, + { + "timestamp_utc": "2025-12-11T18:44:19.714020+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:41:55Z\",\n \"avg_ns\": 17014632912,\n \"stddev_ns\": 861782,\n \"avg_ts\": 7.522936,\n \"stddev_ts\": 0.000372,\n \"samples_ns\": [ 17014731693, 17013746073, 17015420972 ],\n \"samples_ts\": [ 7.52289, 7.52333, 7.52259 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:43:03Z\",\n \"avg_ns\": 25274261994,\n \"stddev_ns\": 8503140,\n \"avg_ts\": 5.064441,\n \"stddev_ts\": 0.001704,\n \"samples_ns\": [ 25265830252, 25282834778, 25274120952 ],\n \"samples_ts\": [ 5.06613, 5.06272, 5.06447 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T18:41:55Z", + "avg_ns": 17014632912, + "stddev_ns": 861782, + "avg_ts": 7.522936, + "stddev_ts": 0.000372, + "samples_ns": [ + 17014731693, + 17013746073, + 17015420972 + ], + "samples_ts": [ + 7.52289, + 7.52333, + 7.52259 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T18:43:03Z", + "avg_ns": 25274261994, + "stddev_ns": 8503140, + "avg_ts": 5.064441, + "stddev_ts": 0.001704, + "samples_ns": [ + 25265830252, + 25282834778, + 25274120952 + ], + "samples_ts": [ + 5.06613, + 5.06272, + 5.06447 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1228 + }, + { + "timestamp_utc": "2025-12-11T18:50:36.219195+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:44:20Z\",\n \"avg_ns\": 17023846562,\n \"stddev_ns\": 5499531,\n \"avg_ts\": 7.518865,\n \"stddev_ts\": 0.002429,\n \"samples_ns\": [ 17029100833, 17024308006, 17018130847 ],\n \"samples_ts\": [ 7.51654, 7.51866, 7.52139 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:45:28Z\",\n \"avg_ns\": 102310158659,\n \"stddev_ns\": 16800118,\n \"avg_ts\": 5.004391,\n \"stddev_ts\": 0.000821,\n \"samples_ns\": [ 102309401749, 102293755885, 102327318345 ],\n \"samples_ts\": [ 5.00443, 5.00519, 5.00355 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T18:44:20Z", + "avg_ns": 17023846562, + "stddev_ns": 5499531, + "avg_ts": 7.518865, + "stddev_ts": 0.002429, + "samples_ns": [ + 17029100833, + 17024308006, + 17018130847 + ], + "samples_ts": [ + 7.51654, + 7.51866, + 7.52139 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T18:45:28Z", + "avg_ns": 102310158659, + "stddev_ns": 16800118, + "avg_ts": 5.004391, + "stddev_ts": 0.000821, + "samples_ns": [ + 102309401749, + 102293755885, + 102327318345 + ], + "samples_ts": [ + 5.00443, + 5.00519, + 5.00355 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1229 + }, + { + "timestamp_utc": "2025-12-11T18:56:27.535961+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:50:37Z\",\n \"avg_ns\": 68468964978,\n \"stddev_ns\": 7194969,\n \"avg_ts\": 7.477841,\n \"stddev_ts\": 0.000786,\n \"samples_ns\": [ 68462537867, 68476737784, 68467619283 ],\n \"samples_ts\": [ 7.47854, 7.47699, 7.47799 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:55:11Z\",\n \"avg_ns\": 25298953259,\n \"stddev_ns\": 15991690,\n \"avg_ts\": 5.059499,\n \"stddev_ts\": 0.003198,\n \"samples_ns\": [ 25282528816, 25299860564, 25314470399 ],\n \"samples_ts\": [ 5.06278, 5.05932, 5.0564 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T18:50:37Z", + "avg_ns": 68468964978, + "stddev_ns": 7194969, + "avg_ts": 7.477841, + "stddev_ts": 0.000786, + "samples_ns": [ + 68462537867, + 68476737784, + 68467619283 + ], + "samples_ts": [ + 7.47854, + 7.47699, + 7.47799 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T18:55:11Z", + "avg_ns": 25298953259, + "stddev_ns": 15991690, + "avg_ts": 5.059499, + "stddev_ts": 0.003198, + "samples_ns": [ + 25282528816, + 25299860564, + 25314470399 + ], + "samples_ts": [ + 5.06278, + 5.05932, + 5.0564 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1230 + }, + { + "timestamp_utc": "2025-12-11T19:06:09.870462+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T18:56:28Z\",\n \"avg_ns\": 68372546845,\n \"stddev_ns\": 109459779,\n \"avg_ts\": 7.488398,\n \"stddev_ts\": 0.011998,\n \"samples_ns\": [ 68454213557, 68415255611, 68248171368 ],\n \"samples_ts\": [ 7.47945, 7.48371, 7.50203 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:01:02Z\",\n \"avg_ns\": 102416908191,\n \"stddev_ns\": 3875645,\n \"avg_ts\": 4.999175,\n \"stddev_ts\": 0.000188,\n \"samples_ns\": [ 102420450576, 102417461743, 102412812256 ],\n \"samples_ts\": [ 4.999, 4.99915, 4.99937 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T18:56:28Z", + "avg_ns": 68372546845, + "stddev_ns": 109459779, + "avg_ts": 7.488398, + "stddev_ts": 0.011998, + "samples_ns": [ + 68454213557, + 68415255611, + 68248171368 + ], + "samples_ts": [ + 7.47945, + 7.48371, + 7.50203 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T19:01:02Z", + "avg_ns": 102416908191, + "stddev_ns": 3875645, + "avg_ts": 4.999175, + "stddev_ts": 0.000188, + "samples_ns": [ + 102420450576, + 102417461743, + 102412812256 + ], + "samples_ts": [ + 4.999, + 4.99915, + 4.99937 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1231 + }, + { + "timestamp_utc": "2025-12-11T19:08:35.182195+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:06:11Z\",\n \"avg_ns\": 17019118292,\n \"stddev_ns\": 3789735,\n \"avg_ts\": 7.520954,\n \"stddev_ts\": 0.001674,\n \"samples_ns\": [ 17021869150, 17020687292, 17014798435 ],\n \"samples_ts\": [ 7.51974, 7.52026, 7.52286 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:07:19Z\",\n \"avg_ns\": 25252614908,\n \"stddev_ns\": 16432822,\n \"avg_ts\": 5.068783,\n \"stddev_ts\": 0.003298,\n \"samples_ns\": [ 25249143864, 25238194888, 25270505972 ],\n \"samples_ts\": [ 5.06948, 5.07168, 5.06519 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T19:06:11Z", + "avg_ns": 17019118292, + "stddev_ns": 3789735, + "avg_ts": 7.520954, + "stddev_ts": 0.001674, + "samples_ns": [ + 17021869150, + 17020687292, + 17014798435 + ], + "samples_ts": [ + 7.51974, + 7.52026, + 7.52286 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T19:07:19Z", + "avg_ns": 25252614908, + "stddev_ns": 16432822, + "avg_ts": 5.068783, + "stddev_ts": 0.003298, + "samples_ns": [ + 25249143864, + 25238194888, + 25270505972 + ], + "samples_ts": [ + 5.06948, + 5.07168, + 5.06519 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1232 + }, + { + "timestamp_utc": "2025-12-11T19:14:51.898024+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:08:36Z\",\n \"avg_ns\": 17018224421,\n \"stddev_ns\": 2083007,\n \"avg_ts\": 7.521349,\n \"stddev_ts\": 0.000919,\n \"samples_ns\": [ 17020165489, 17016030738, 17018477037 ],\n \"samples_ts\": [ 7.52049, 7.52232, 7.52124 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:09:44Z\",\n \"avg_ns\": 102369023198,\n \"stddev_ns\": 74246871,\n \"avg_ts\": 5.001515,\n \"stddev_ts\": 0.003628,\n \"samples_ns\": [ 102372855310, 102292935859, 102441278427 ],\n \"samples_ts\": [ 5.00133, 5.00523, 4.99799 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T19:08:36Z", + "avg_ns": 17018224421, + "stddev_ns": 2083007, + "avg_ts": 7.521349, + "stddev_ts": 0.000919, + "samples_ns": [ + 17020165489, + 17016030738, + 17018477037 + ], + "samples_ts": [ + 7.52049, + 7.52232, + 7.52124 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T19:09:44Z", + "avg_ns": 102369023198, + "stddev_ns": 74246871, + "avg_ts": 5.001515, + "stddev_ts": 0.003628, + "samples_ns": [ + 102372855310, + 102292935859, + 102441278427 + ], + "samples_ts": [ + 5.00133, + 5.00523, + 4.99799 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1233 + }, + { + "timestamp_utc": "2025-12-11T19:20:45.119827+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:14:53Z\",\n \"avg_ns\": 68946141188,\n \"stddev_ns\": 36273878,\n \"avg_ts\": 7.426088,\n \"stddev_ts\": 0.003907,\n \"samples_ns\": [ 68907598854, 68951211628, 68979613082 ],\n \"samples_ts\": [ 7.43024, 7.42554, 7.42248 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:19:28Z\",\n \"avg_ns\": 25299475585,\n \"stddev_ns\": 7941565,\n \"avg_ts\": 5.059394,\n \"stddev_ts\": 0.001588,\n \"samples_ns\": [ 25302775507, 25305231754, 25290419496 ],\n \"samples_ts\": [ 5.05873, 5.05824, 5.06121 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T19:14:53Z", + "avg_ns": 68946141188, + "stddev_ns": 36273878, + "avg_ts": 7.426088, + "stddev_ts": 0.003907, + "samples_ns": [ + 68907598854, + 68951211628, + 68979613082 + ], + "samples_ts": [ + 7.43024, + 7.42554, + 7.42248 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T19:19:28Z", + "avg_ns": 25299475585, + "stddev_ns": 7941565, + "avg_ts": 5.059394, + "stddev_ts": 0.001588, + "samples_ns": [ + 25302775507, + 25305231754, + 25290419496 + ], + "samples_ts": [ + 5.05873, + 5.05824, + 5.06121 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1234 + }, + { + "timestamp_utc": "2025-12-11T19:30:29.437722+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:20:46Z\",\n \"avg_ns\": 68970977639,\n \"stddev_ns\": 12955087,\n \"avg_ts\": 7.423412,\n \"stddev_ts\": 0.001394,\n \"samples_ns\": [ 68981193639, 68975325927, 68956413353 ],\n \"samples_ts\": [ 7.42231, 7.42294, 7.42498 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:25:22Z\",\n \"avg_ns\": 102305931932,\n \"stddev_ns\": 8968079,\n \"avg_ts\": 5.004597,\n \"stddev_ts\": 0.000438,\n \"samples_ns\": [ 102296847881, 102314767460, 102306180456 ],\n \"samples_ts\": [ 5.00504, 5.00417, 5.00459 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T19:20:46Z", + "avg_ns": 68970977639, + "stddev_ns": 12955087, + "avg_ts": 7.423412, + "stddev_ts": 0.001394, + "samples_ns": [ + 68981193639, + 68975325927, + 68956413353 + ], + "samples_ts": [ + 7.42231, + 7.42294, + 7.42498 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T19:25:22Z", + "avg_ns": 102305931932, + "stddev_ns": 8968079, + "avg_ts": 5.004597, + "stddev_ts": 0.000438, + "samples_ns": [ + 102296847881, + 102314767460, + 102306180456 + ], + "samples_ts": [ + 5.00504, + 5.00417, + 5.00459 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1235 + }, + { + "timestamp_utc": "2025-12-11T19:32:54.905355+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:30:30Z\",\n \"avg_ns\": 17016941907,\n \"stddev_ns\": 1231143,\n \"avg_ts\": 7.521916,\n \"stddev_ts\": 0.000544,\n \"samples_ns\": [ 17017937817, 17017322494, 17015565410 ],\n \"samples_ts\": [ 7.52148, 7.52175, 7.52252 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:31:38Z\",\n \"avg_ns\": 25307544054,\n \"stddev_ns\": 4335414,\n \"avg_ts\": 5.057780,\n \"stddev_ts\": 0.000865,\n \"samples_ns\": [ 25305012533, 25312543279, 25305076352 ],\n \"samples_ts\": [ 5.05829, 5.05678, 5.05827 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T19:30:30Z", + "avg_ns": 17016941907, + "stddev_ns": 1231143, + "avg_ts": 7.521916, + "stddev_ts": 0.000544, + "samples_ns": [ + 17017937817, + 17017322494, + 17015565410 + ], + "samples_ts": [ + 7.52148, + 7.52175, + 7.52252 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T19:31:38Z", + "avg_ns": 25307544054, + "stddev_ns": 4335414, + "avg_ts": 5.05778, + "stddev_ts": 0.000865, + "samples_ns": [ + 25305012533, + 25312543279, + 25305076352 + ], + "samples_ts": [ + 5.05829, + 5.05678, + 5.05827 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1236 + }, + { + "timestamp_utc": "2025-12-11T19:39:11.494517+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:32:56Z\",\n \"avg_ns\": 17012880085,\n \"stddev_ns\": 1141845,\n \"avg_ts\": 7.523711,\n \"stddev_ts\": 0.000502,\n \"samples_ns\": [ 17011920004, 17012588452, 17014131800 ],\n \"samples_ts\": [ 7.52414, 7.52384, 7.52316 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:34:04Z\",\n \"avg_ns\": 102343334459,\n \"stddev_ns\": 33919472,\n \"avg_ts\": 5.002769,\n \"stddev_ts\": 0.001658,\n \"samples_ns\": [ 102324274228, 102323235931, 102382493220 ],\n \"samples_ts\": [ 5.0037, 5.00375, 5.00085 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T19:32:56Z", + "avg_ns": 17012880085, + "stddev_ns": 1141845, + "avg_ts": 7.523711, + "stddev_ts": 0.000502, + "samples_ns": [ + 17011920004, + 17012588452, + 17014131800 + ], + "samples_ts": [ + 7.52414, + 7.52384, + 7.52316 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T19:34:04Z", + "avg_ns": 102343334459, + "stddev_ns": 33919472, + "avg_ts": 5.002769, + "stddev_ts": 0.001658, + "samples_ns": [ + 102324274228, + 102323235931, + 102382493220 + ], + "samples_ts": [ + 5.0037, + 5.00375, + 5.00085 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1237 + }, + { + "timestamp_utc": "2025-12-11T19:45:01.604979+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:39:12Z\",\n \"avg_ns\": 68237887523,\n \"stddev_ns\": 2667391,\n \"avg_ts\": 7.503163,\n \"stddev_ts\": 0.000293,\n \"samples_ns\": [ 68237993534, 68235168706, 68240500329 ],\n \"samples_ts\": [ 7.50315, 7.50346, 7.50288 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:43:45Z\",\n \"avg_ns\": 25242865232,\n \"stddev_ns\": 441468,\n \"avg_ts\": 5.070740,\n \"stddev_ts\": 0.000089,\n \"samples_ns\": [ 25242391710, 25242938508, 25243265478 ],\n \"samples_ts\": [ 5.07083, 5.07073, 5.07066 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T19:39:12Z", + "avg_ns": 68237887523, + "stddev_ns": 2667391, + "avg_ts": 7.503163, + "stddev_ts": 0.000293, + "samples_ns": [ + 68237993534, + 68235168706, + 68240500329 + ], + "samples_ts": [ + 7.50315, + 7.50346, + 7.50288 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T19:43:45Z", + "avg_ns": 25242865232, + "stddev_ns": 441468, + "avg_ts": 5.07074, + "stddev_ts": 8.9e-05, + "samples_ns": [ + 25242391710, + 25242938508, + 25243265478 + ], + "samples_ts": [ + 5.07083, + 5.07073, + 5.07066 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1238 + }, + { + "timestamp_utc": "2025-12-11T19:54:43.124058+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:45:02Z\",\n \"avg_ns\": 68244325720,\n \"stddev_ns\": 5055763,\n \"avg_ts\": 7.502455,\n \"stddev_ts\": 0.000555,\n \"samples_ns\": [ 68244103413, 68249482212, 68239391536 ],\n \"samples_ts\": [ 7.50248, 7.50189, 7.503 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:49:35Z\",\n \"avg_ns\": 102361774245,\n \"stddev_ns\": 133555304,\n \"avg_ts\": 5.001873,\n \"stddev_ts\": 0.006531,\n \"samples_ns\": [ 102449970615, 102427233649, 102208118473 ],\n \"samples_ts\": [ 4.99756, 4.99867, 5.00939 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T19:45:02Z", + "avg_ns": 68244325720, + "stddev_ns": 5055763, + "avg_ts": 7.502455, + "stddev_ts": 0.000555, + "samples_ns": [ + 68244103413, + 68249482212, + 68239391536 + ], + "samples_ts": [ + 7.50248, + 7.50189, + 7.503 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T19:49:35Z", + "avg_ns": 102361774245, + "stddev_ns": 133555304, + "avg_ts": 5.001873, + "stddev_ts": 0.006531, + "samples_ns": [ + 102449970615, + 102427233649, + 102208118473 + ], + "samples_ts": [ + 4.99756, + 4.99867, + 5.00939 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1239 + }, + { + "timestamp_utc": "2025-12-11T19:57:08.653358+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:54:44Z\",\n \"avg_ns\": 17018637938,\n \"stddev_ns\": 1777279,\n \"avg_ts\": 7.521166,\n \"stddev_ts\": 0.000781,\n \"samples_ns\": [ 17019817700, 17019490558, 17016605558 ],\n \"samples_ts\": [ 7.52064, 7.52079, 7.52206 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:55:52Z\",\n \"avg_ns\": 25334991416,\n \"stddev_ns\": 850032,\n \"avg_ts\": 5.052301,\n \"stddev_ts\": 0.000167,\n \"samples_ns\": [ 25334738921, 25335923521, 25334311807 ],\n \"samples_ts\": [ 5.05235, 5.05212, 5.05244 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T19:54:44Z", + "avg_ns": 17018637938, + "stddev_ns": 1777279, + "avg_ts": 7.521166, + "stddev_ts": 0.000781, + "samples_ns": [ + 17019817700, + 17019490558, + 17016605558 + ], + "samples_ts": [ + 7.52064, + 7.52079, + 7.52206 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T19:55:52Z", + "avg_ns": 25334991416, + "stddev_ns": 850032, + "avg_ts": 5.052301, + "stddev_ts": 0.000167, + "samples_ns": [ + 25334738921, + 25335923521, + 25334311807 + ], + "samples_ts": [ + 5.05235, + 5.05212, + 5.05244 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1240 + }, + { + "timestamp_utc": "2025-12-11T20:03:24.760453+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:57:09Z\",\n \"avg_ns\": 17018013322,\n \"stddev_ns\": 4808133,\n \"avg_ts\": 7.521442,\n \"stddev_ts\": 0.002124,\n \"samples_ns\": [ 17021285237, 17020257449, 17012497282 ],\n \"samples_ts\": [ 7.52, 7.52045, 7.52388 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T19:58:17Z\",\n \"avg_ns\": 102186183963,\n \"stddev_ns\": 15254639,\n \"avg_ts\": 5.010462,\n \"stddev_ts\": 0.000748,\n \"samples_ns\": [ 102191295054, 102198223260, 102169033576 ],\n \"samples_ts\": [ 5.01021, 5.00987, 5.0113 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T19:57:09Z", + "avg_ns": 17018013322, + "stddev_ns": 4808133, + "avg_ts": 7.521442, + "stddev_ts": 0.002124, + "samples_ns": [ + 17021285237, + 17020257449, + 17012497282 + ], + "samples_ts": [ + 7.52, + 7.52045, + 7.52388 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T19:58:17Z", + "avg_ns": 102186183963, + "stddev_ns": 15254639, + "avg_ts": 5.010462, + "stddev_ts": 0.000748, + "samples_ns": [ + 102191295054, + 102198223260, + 102169033576 + ], + "samples_ts": [ + 5.01021, + 5.00987, + 5.0113 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1241 + }, + { + "timestamp_utc": "2025-12-11T20:09:15.791302+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:03:25Z\",\n \"avg_ns\": 68449616852,\n \"stddev_ns\": 3605558,\n \"avg_ts\": 7.479954,\n \"stddev_ts\": 0.000393,\n \"samples_ns\": [ 68446267571, 68453417131, 68449165855 ],\n \"samples_ts\": [ 7.48032, 7.47954, 7.48 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:07:59Z\",\n \"avg_ns\": 25253284882,\n \"stddev_ns\": 5062155,\n \"avg_ts\": 5.068648,\n \"stddev_ts\": 0.001016,\n \"samples_ns\": [ 25258996339, 25249352184, 25251506123 ],\n \"samples_ts\": [ 5.0675, 5.06944, 5.069 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T20:03:25Z", + "avg_ns": 68449616852, + "stddev_ns": 3605558, + "avg_ts": 7.479954, + "stddev_ts": 0.000393, + "samples_ns": [ + 68446267571, + 68453417131, + 68449165855 + ], + "samples_ts": [ + 7.48032, + 7.47954, + 7.48 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T20:07:59Z", + "avg_ns": 25253284882, + "stddev_ns": 5062155, + "avg_ts": 5.068648, + "stddev_ts": 0.001016, + "samples_ns": [ + 25258996339, + 25249352184, + 25251506123 + ], + "samples_ts": [ + 5.0675, + 5.06944, + 5.069 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1242 + }, + { + "timestamp_utc": "2025-12-11T20:18:57.753963+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:09:16Z\",\n \"avg_ns\": 68409195820,\n \"stddev_ns\": 12606664,\n \"avg_ts\": 7.484374,\n \"stddev_ts\": 0.001379,\n \"samples_ns\": [ 68396815308, 68422006753, 68408765401 ],\n \"samples_ts\": [ 7.48573, 7.48297, 7.48442 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:13:50Z\",\n \"avg_ns\": 102281048667,\n \"stddev_ns\": 35746498,\n \"avg_ts\": 5.005815,\n \"stddev_ts\": 0.001749,\n \"samples_ns\": [ 102265944390, 102255333588, 102321868023 ],\n \"samples_ts\": [ 5.00655, 5.00707, 5.00382 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T20:09:16Z", + "avg_ns": 68409195820, + "stddev_ns": 12606664, + "avg_ts": 7.484374, + "stddev_ts": 0.001379, + "samples_ns": [ + 68396815308, + 68422006753, + 68408765401 + ], + "samples_ts": [ + 7.48573, + 7.48297, + 7.48442 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T20:13:50Z", + "avg_ns": 102281048667, + "stddev_ns": 35746498, + "avg_ts": 5.005815, + "stddev_ts": 0.001749, + "samples_ns": [ + 102265944390, + 102255333588, + 102321868023 + ], + "samples_ts": [ + 5.00655, + 5.00707, + 5.00382 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1243 + }, + { + "timestamp_utc": "2025-12-11T20:21:23.223723+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:18:58Z\",\n \"avg_ns\": 17016903022,\n \"stddev_ns\": 3920239,\n \"avg_ts\": 7.521933,\n \"stddev_ts\": 0.001732,\n \"samples_ns\": [ 17012408287, 17018703987, 17019596793 ],\n \"samples_ts\": [ 7.52392, 7.52114, 7.52074 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:20:06Z\",\n \"avg_ns\": 25322377826,\n \"stddev_ns\": 7034810,\n \"avg_ts\": 5.054818,\n \"stddev_ts\": 0.001404,\n \"samples_ns\": [ 25330132779, 25320588116, 25316412584 ],\n \"samples_ts\": [ 5.05327, 5.05517, 5.05601 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T20:18:58Z", + "avg_ns": 17016903022, + "stddev_ns": 3920239, + "avg_ts": 7.521933, + "stddev_ts": 0.001732, + "samples_ns": [ + 17012408287, + 17018703987, + 17019596793 + ], + "samples_ts": [ + 7.52392, + 7.52114, + 7.52074 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T20:20:06Z", + "avg_ns": 25322377826, + "stddev_ns": 7034810, + "avg_ts": 5.054818, + "stddev_ts": 0.001404, + "samples_ns": [ + 25330132779, + 25320588116, + 25316412584 + ], + "samples_ts": [ + 5.05327, + 5.05517, + 5.05601 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1244 + }, + { + "timestamp_utc": "2025-12-11T20:27:39.435347+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:21:24Z\",\n \"avg_ns\": 17008839346,\n \"stddev_ns\": 2252270,\n \"avg_ts\": 7.525499,\n \"stddev_ts\": 0.000997,\n \"samples_ns\": [ 17009876044, 17006255406, 17010386588 ],\n \"samples_ts\": [ 7.52504, 7.52664, 7.52481 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:22:32Z\",\n \"avg_ns\": 102242546195,\n \"stddev_ns\": 29667695,\n \"avg_ts\": 5.007700,\n \"stddev_ts\": 0.001453,\n \"samples_ns\": [ 102275517619, 102234106217, 102218014750 ],\n \"samples_ts\": [ 5.00609, 5.00811, 5.0089 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T20:21:24Z", + "avg_ns": 17008839346, + "stddev_ns": 2252270, + "avg_ts": 7.525499, + "stddev_ts": 0.000997, + "samples_ns": [ + 17009876044, + 17006255406, + 17010386588 + ], + "samples_ts": [ + 7.52504, + 7.52664, + 7.52481 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T20:22:32Z", + "avg_ns": 102242546195, + "stddev_ns": 29667695, + "avg_ts": 5.0077, + "stddev_ts": 0.001453, + "samples_ns": [ + 102275517619, + 102234106217, + 102218014750 + ], + "samples_ts": [ + 5.00609, + 5.00811, + 5.0089 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1245 + }, + { + "timestamp_utc": "2025-12-11T20:33:32.559128+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:27:40Z\",\n \"avg_ns\": 68950806513,\n \"stddev_ns\": 2107784,\n \"avg_ts\": 7.425584,\n \"stddev_ts\": 0.000225,\n \"samples_ns\": [ 68948489134, 68951376884, 68952553522 ],\n \"samples_ts\": [ 7.42583, 7.42552, 7.4254 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:32:16Z\",\n \"avg_ns\": 25286030698,\n \"stddev_ns\": 8347608,\n \"avg_ts\": 5.062084,\n \"stddev_ts\": 0.001671,\n \"samples_ns\": [ 25282166111, 25295608645, 25280317339 ],\n \"samples_ts\": [ 5.06286, 5.06017, 5.06323 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T20:27:40Z", + "avg_ns": 68950806513, + "stddev_ns": 2107784, + "avg_ts": 7.425584, + "stddev_ts": 0.000225, + "samples_ns": [ + 68948489134, + 68951376884, + 68952553522 + ], + "samples_ts": [ + 7.42583, + 7.42552, + 7.4254 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T20:32:16Z", + "avg_ns": 25286030698, + "stddev_ns": 8347608, + "avg_ts": 5.062084, + "stddev_ts": 0.001671, + "samples_ns": [ + 25282166111, + 25295608645, + 25280317339 + ], + "samples_ts": [ + 5.06286, + 5.06017, + 5.06323 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1246 + }, + { + "timestamp_utc": "2025-12-11T20:43:17.476999+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:33:33Z\",\n \"avg_ns\": 68955164379,\n \"stddev_ns\": 9831867,\n \"avg_ts\": 7.425115,\n \"stddev_ts\": 0.001058,\n \"samples_ns\": [ 68966044376, 68946941045, 68952507718 ],\n \"samples_ts\": [ 7.42394, 7.426, 7.4254 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:38:09Z\",\n \"avg_ns\": 102542610453,\n \"stddev_ns\": 87047337,\n \"avg_ts\": 4.993049,\n \"stddev_ts\": 0.004238,\n \"samples_ns\": [ 102456028967, 102630116281, 102541686111 ],\n \"samples_ts\": [ 4.99727, 4.98879, 4.99309 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T20:33:33Z", + "avg_ns": 68955164379, + "stddev_ns": 9831867, + "avg_ts": 7.425115, + "stddev_ts": 0.001058, + "samples_ns": [ + 68966044376, + 68946941045, + 68952507718 + ], + "samples_ts": [ + 7.42394, + 7.426, + 7.4254 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T20:38:09Z", + "avg_ns": 102542610453, + "stddev_ns": 87047337, + "avg_ts": 4.993049, + "stddev_ts": 0.004238, + "samples_ns": [ + 102456028967, + 102630116281, + 102541686111 + ], + "samples_ts": [ + 4.99727, + 4.98879, + 4.99309 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1247 + }, + { + "timestamp_utc": "2025-12-11T20:45:42.801331+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:43:18Z\",\n \"avg_ns\": 17013837641,\n \"stddev_ns\": 1487067,\n \"avg_ts\": 7.523288,\n \"stddev_ts\": 0.000655,\n \"samples_ns\": [ 17015076032, 17014240282, 17012196610 ],\n \"samples_ts\": [ 7.52274, 7.52311, 7.52401 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:44:26Z\",\n \"avg_ns\": 25267438729,\n \"stddev_ns\": 5337704,\n \"avg_ts\": 5.065808,\n \"stddev_ts\": 0.001070,\n \"samples_ns\": [ 25273559029, 25263748073, 25265009085 ],\n \"samples_ts\": [ 5.06458, 5.06655, 5.0663 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T20:43:18Z", + "avg_ns": 17013837641, + "stddev_ns": 1487067, + "avg_ts": 7.523288, + "stddev_ts": 0.000655, + "samples_ns": [ + 17015076032, + 17014240282, + 17012196610 + ], + "samples_ts": [ + 7.52274, + 7.52311, + 7.52401 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T20:44:26Z", + "avg_ns": 25267438729, + "stddev_ns": 5337704, + "avg_ts": 5.065808, + "stddev_ts": 0.00107, + "samples_ns": [ + 25273559029, + 25263748073, + 25265009085 + ], + "samples_ts": [ + 5.06458, + 5.06655, + 5.0663 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1248 + }, + { + "timestamp_utc": "2025-12-11T20:51:59.435077+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:45:43Z\",\n \"avg_ns\": 17010867936,\n \"stddev_ns\": 2819298,\n \"avg_ts\": 7.524601,\n \"stddev_ts\": 0.001245,\n \"samples_ns\": [ 17012486875, 17012497463, 17007619472 ],\n \"samples_ts\": [ 7.52389, 7.52388, 7.52604 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:46:52Z\",\n \"avg_ns\": 102378090309,\n \"stddev_ns\": 73884248,\n \"avg_ts\": 5.001072,\n \"stddev_ts\": 0.003608,\n \"samples_ns\": [ 102343045085, 102328250811, 102462975032 ],\n \"samples_ts\": [ 5.00278, 5.00351, 4.99693 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T20:45:43Z", + "avg_ns": 17010867936, + "stddev_ns": 2819298, + "avg_ts": 7.524601, + "stddev_ts": 0.001245, + "samples_ns": [ + 17012486875, + 17012497463, + 17007619472 + ], + "samples_ts": [ + 7.52389, + 7.52388, + 7.52604 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T20:46:52Z", + "avg_ns": 102378090309, + "stddev_ns": 73884248, + "avg_ts": 5.001072, + "stddev_ts": 0.003608, + "samples_ns": [ + 102343045085, + 102328250811, + 102462975032 + ], + "samples_ts": [ + 5.00278, + 5.00351, + 4.99693 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1249 + }, + { + "timestamp_utc": "2025-12-11T20:57:49.530308+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:52:00Z\",\n \"avg_ns\": 68228104778,\n \"stddev_ns\": 2358733,\n \"avg_ts\": 7.504239,\n \"stddev_ts\": 0.000258,\n \"samples_ns\": [ 68229643655, 68229263886, 68225406794 ],\n \"samples_ts\": [ 7.50407, 7.50411, 7.50454 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:56:33Z\",\n \"avg_ns\": 25243496831,\n \"stddev_ns\": 9196486,\n \"avg_ts\": 5.070613,\n \"stddev_ts\": 0.001847,\n \"samples_ns\": [ 25234849702, 25253156170, 25242484622 ],\n \"samples_ts\": [ 5.07235, 5.06867, 5.07082 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T20:52:00Z", + "avg_ns": 68228104778, + "stddev_ns": 2358733, + "avg_ts": 7.504239, + "stddev_ts": 0.000258, + "samples_ns": [ + 68229643655, + 68229263886, + 68225406794 + ], + "samples_ts": [ + 7.50407, + 7.50411, + 7.50454 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T20:56:33Z", + "avg_ns": 25243496831, + "stddev_ns": 9196486, + "avg_ts": 5.070613, + "stddev_ts": 0.001847, + "samples_ns": [ + 25234849702, + 25253156170, + 25242484622 + ], + "samples_ts": [ + 5.07235, + 5.06867, + 5.07082 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1250 + }, + { + "timestamp_utc": "2025-12-11T21:07:30.751769+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T20:57:50Z\",\n \"avg_ns\": 68234666946,\n \"stddev_ns\": 3568852,\n \"avg_ts\": 7.503517,\n \"stddev_ts\": 0.000390,\n \"samples_ns\": [ 68233726536, 68238592143, 68231682161 ],\n \"samples_ts\": [ 7.50362, 7.50309, 7.50385 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:02:23Z\",\n \"avg_ns\": 102276603921,\n \"stddev_ns\": 4242815,\n \"avg_ts\": 5.006032,\n \"stddev_ts\": 0.000206,\n \"samples_ns\": [ 102280105928, 102271920532, 102277785305 ],\n \"samples_ts\": [ 5.00586, 5.00626, 5.00597 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T20:57:50Z", + "avg_ns": 68234666946, + "stddev_ns": 3568852, + "avg_ts": 7.503517, + "stddev_ts": 0.00039, + "samples_ns": [ + 68233726536, + 68238592143, + 68231682161 + ], + "samples_ts": [ + 7.50362, + 7.50309, + 7.50385 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T21:02:23Z", + "avg_ns": 102276603921, + "stddev_ns": 4242815, + "avg_ts": 5.006032, + "stddev_ts": 0.000206, + "samples_ns": [ + 102280105928, + 102271920532, + 102277785305 + ], + "samples_ts": [ + 5.00586, + 5.00626, + 5.00597 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1251 + }, + { + "timestamp_utc": "2025-12-11T21:09:56.124514+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:07:31Z\",\n \"avg_ns\": 17019144942,\n \"stddev_ns\": 3819055,\n \"avg_ts\": 7.520942,\n \"stddev_ts\": 0.001687,\n \"samples_ns\": [ 17022021015, 17020599019, 17014814793 ],\n \"samples_ts\": [ 7.51967, 7.5203, 7.52286 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:08:40Z\",\n \"avg_ns\": 25278019297,\n \"stddev_ns\": 8495143,\n \"avg_ts\": 5.063688,\n \"stddev_ts\": 0.001702,\n \"samples_ns\": [ 25278573820, 25269261967, 25286222105 ],\n \"samples_ts\": [ 5.06358, 5.06544, 5.06205 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T21:07:31Z", + "avg_ns": 17019144942, + "stddev_ns": 3819055, + "avg_ts": 7.520942, + "stddev_ts": 0.001687, + "samples_ns": [ + 17022021015, + 17020599019, + 17014814793 + ], + "samples_ts": [ + 7.51967, + 7.5203, + 7.52286 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T21:08:40Z", + "avg_ns": 25278019297, + "stddev_ns": 8495143, + "avg_ts": 5.063688, + "stddev_ts": 0.001702, + "samples_ns": [ + 25278573820, + 25269261967, + 25286222105 + ], + "samples_ts": [ + 5.06358, + 5.06544, + 5.06205 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1252 + }, + { + "timestamp_utc": "2025-12-11T21:16:13.560396+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:09:57Z\",\n \"avg_ns\": 16999345821,\n \"stddev_ns\": 13987394,\n \"avg_ts\": 7.529705,\n \"stddev_ts\": 0.006197,\n \"samples_ns\": [ 17011650280, 16984132829, 17002254354 ],\n \"samples_ts\": [ 7.52426, 7.53645, 7.52841 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:11:05Z\",\n \"avg_ns\": 102655033582,\n \"stddev_ns\": 817609732,\n \"avg_ts\": 4.987788,\n \"stddev_ts\": 0.039544,\n \"samples_ns\": [ 102180673721, 102185302974, 103599124053 ],\n \"samples_ts\": [ 5.01073, 5.01051, 4.94213 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T21:09:57Z", + "avg_ns": 16999345821, + "stddev_ns": 13987394, + "avg_ts": 7.529705, + "stddev_ts": 0.006197, + "samples_ns": [ + 17011650280, + 16984132829, + 17002254354 + ], + "samples_ts": [ + 7.52426, + 7.53645, + 7.52841 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T21:11:05Z", + "avg_ns": 102655033582, + "stddev_ns": 817609732, + "avg_ts": 4.987788, + "stddev_ts": 0.039544, + "samples_ns": [ + 102180673721, + 102185302974, + 103599124053 + ], + "samples_ts": [ + 5.01073, + 5.01051, + 4.94213 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1253 + }, + { + "timestamp_utc": "2025-12-11T21:22:04.585317+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:16:14Z\",\n \"avg_ns\": 68449480541,\n \"stddev_ns\": 8132171,\n \"avg_ts\": 7.479969,\n \"stddev_ts\": 0.000889,\n \"samples_ns\": [ 68442011517, 68458143792, 68448286314 ],\n \"samples_ts\": [ 7.48079, 7.47902, 7.4801 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:20:48Z\",\n \"avg_ns\": 25257771980,\n \"stddev_ns\": 9971856,\n \"avg_ts\": 5.067748,\n \"stddev_ts\": 0.002000,\n \"samples_ns\": [ 25252535558, 25269269807, 25251510576 ],\n \"samples_ts\": [ 5.0688, 5.06544, 5.069 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T21:16:14Z", + "avg_ns": 68449480541, + "stddev_ns": 8132171, + "avg_ts": 7.479969, + "stddev_ts": 0.000889, + "samples_ns": [ + 68442011517, + 68458143792, + 68448286314 + ], + "samples_ts": [ + 7.48079, + 7.47902, + 7.4801 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T21:20:48Z", + "avg_ns": 25257771980, + "stddev_ns": 9971856, + "avg_ts": 5.067748, + "stddev_ts": 0.002, + "samples_ns": [ + 25252535558, + 25269269807, + 25251510576 + ], + "samples_ts": [ + 5.0688, + 5.06544, + 5.069 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1254 + }, + { + "timestamp_utc": "2025-12-11T21:31:46.393443+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:22:05Z\",\n \"avg_ns\": 68439663775,\n \"stddev_ns\": 26454402,\n \"avg_ts\": 7.481043,\n \"stddev_ts\": 0.002892,\n \"samples_ns\": [ 68455661787, 68454201031, 68409128507 ],\n \"samples_ts\": [ 7.47929, 7.47945, 7.48438 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:26:39Z\",\n \"avg_ns\": 102191821829,\n \"stddev_ns\": 42591213,\n \"avg_ts\": 5.010186,\n \"stddev_ts\": 0.002088,\n \"samples_ns\": [ 102239912470, 102176685887, 102158867131 ],\n \"samples_ts\": [ 5.00783, 5.01093, 5.0118 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T21:22:05Z", + "avg_ns": 68439663775, + "stddev_ns": 26454402, + "avg_ts": 7.481043, + "stddev_ts": 0.002892, + "samples_ns": [ + 68455661787, + 68454201031, + 68409128507 + ], + "samples_ts": [ + 7.47929, + 7.47945, + 7.48438 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T21:26:39Z", + "avg_ns": 102191821829, + "stddev_ns": 42591213, + "avg_ts": 5.010186, + "stddev_ts": 0.002088, + "samples_ns": [ + 102239912470, + 102176685887, + 102158867131 + ], + "samples_ts": [ + 5.00783, + 5.01093, + 5.0118 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1255 + }, + { + "timestamp_utc": "2025-12-11T21:34:11.679828+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:31:47Z\",\n \"avg_ns\": 17010361047,\n \"stddev_ns\": 4861293,\n \"avg_ts\": 7.524826,\n \"stddev_ts\": 0.002149,\n \"samples_ns\": [ 17012708090, 17004775418, 17013599635 ],\n \"samples_ts\": [ 7.52379, 7.5273, 7.52339 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:32:55Z\",\n \"avg_ns\": 25262339438,\n \"stddev_ns\": 10699900,\n \"avg_ts\": 5.066831,\n \"stddev_ts\": 0.002145,\n \"samples_ns\": [ 25274551790, 25254618808, 25257847717 ],\n \"samples_ts\": [ 5.06438, 5.06838, 5.06773 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T21:31:47Z", + "avg_ns": 17010361047, + "stddev_ns": 4861293, + "avg_ts": 7.524826, + "stddev_ts": 0.002149, + "samples_ns": [ + 17012708090, + 17004775418, + 17013599635 + ], + "samples_ts": [ + 7.52379, + 7.5273, + 7.52339 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T21:32:55Z", + "avg_ns": 25262339438, + "stddev_ns": 10699900, + "avg_ts": 5.066831, + "stddev_ts": 0.002145, + "samples_ns": [ + 25274551790, + 25254618808, + 25257847717 + ], + "samples_ts": [ + 5.06438, + 5.06838, + 5.06773 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1256 + }, + { + "timestamp_utc": "2025-12-11T21:40:29.307634+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:34:12Z\",\n \"avg_ns\": 17017951864,\n \"stddev_ns\": 2900637,\n \"avg_ts\": 7.521469,\n \"stddev_ts\": 0.001282,\n \"samples_ns\": [ 17018428394, 17014842469, 17020584729 ],\n \"samples_ts\": [ 7.52126, 7.52284, 7.52031 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:35:20Z\",\n \"avg_ns\": 102701350412,\n \"stddev_ns\": 708948310,\n \"avg_ts\": 4.985487,\n \"stddev_ts\": 0.034279,\n \"samples_ns\": [ 102321862398, 102262923156, 103519265684 ],\n \"samples_ts\": [ 5.00382, 5.0067, 4.94594 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T21:34:12Z", + "avg_ns": 17017951864, + "stddev_ns": 2900637, + "avg_ts": 7.521469, + "stddev_ts": 0.001282, + "samples_ns": [ + 17018428394, + 17014842469, + 17020584729 + ], + "samples_ts": [ + 7.52126, + 7.52284, + 7.52031 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T21:35:20Z", + "avg_ns": 102701350412, + "stddev_ns": 708948310, + "avg_ts": 4.985487, + "stddev_ts": 0.034279, + "samples_ns": [ + 102321862398, + 102262923156, + 103519265684 + ], + "samples_ts": [ + 5.00382, + 5.0067, + 4.94594 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1257 + }, + { + "timestamp_utc": "2025-12-11T21:46:22.378302+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:40:30Z\",\n \"avg_ns\": 68960255667,\n \"stddev_ns\": 3331976,\n \"avg_ts\": 7.424566,\n \"stddev_ts\": 0.000358,\n \"samples_ns\": [ 68964089472, 68958240975, 68958436555 ],\n \"samples_ts\": [ 7.42415, 7.42478, 7.42476 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:45:06Z\",\n \"avg_ns\": 25249038040,\n \"stddev_ns\": 6395808,\n \"avg_ts\": 5.069500,\n \"stddev_ts\": 0.001283,\n \"samples_ns\": [ 25255950701, 25247821625, 25243341796 ],\n \"samples_ts\": [ 5.06811, 5.06974, 5.07064 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T21:40:30Z", + "avg_ns": 68960255667, + "stddev_ns": 3331976, + "avg_ts": 7.424566, + "stddev_ts": 0.000358, + "samples_ns": [ + 68964089472, + 68958240975, + 68958436555 + ], + "samples_ts": [ + 7.42415, + 7.42478, + 7.42476 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T21:45:06Z", + "avg_ns": 25249038040, + "stddev_ns": 6395808, + "avg_ts": 5.0695, + "stddev_ts": 0.001283, + "samples_ns": [ + 25255950701, + 25247821625, + 25243341796 + ], + "samples_ts": [ + 5.06811, + 5.06974, + 5.07064 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1258 + }, + { + "timestamp_utc": "2025-12-11T21:56:06.972387+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:46:23Z\",\n \"avg_ns\": 68969073064,\n \"stddev_ns\": 5277191,\n \"avg_ts\": 7.423617,\n \"stddev_ts\": 0.000568,\n \"samples_ns\": [ 68974558025, 68964031637, 68968629530 ],\n \"samples_ts\": [ 7.42303, 7.42416, 7.42366 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:50:59Z\",\n \"avg_ns\": 102418827883,\n \"stddev_ns\": 234879902,\n \"avg_ts\": 4.999098,\n \"stddev_ts\": 0.011450,\n \"samples_ns\": [ 102272496712, 102294233874, 102689753064 ],\n \"samples_ts\": [ 5.00623, 5.00517, 4.98589 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T21:46:23Z", + "avg_ns": 68969073064, + "stddev_ns": 5277191, + "avg_ts": 7.423617, + "stddev_ts": 0.000568, + "samples_ns": [ + 68974558025, + 68964031637, + 68968629530 + ], + "samples_ts": [ + 7.42303, + 7.42416, + 7.42366 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T21:50:59Z", + "avg_ns": 102418827883, + "stddev_ns": 234879902, + "avg_ts": 4.999098, + "stddev_ts": 0.01145, + "samples_ns": [ + 102272496712, + 102294233874, + 102689753064 + ], + "samples_ts": [ + 5.00623, + 5.00517, + 4.98589 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1259 + }, + { + "timestamp_utc": "2025-12-11T21:58:09.205140+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:56:08Z\",\n \"avg_ns\": 12924695688,\n \"stddev_ns\": 10762586,\n \"avg_ts\": 9.903526,\n \"stddev_ts\": 0.008245,\n \"samples_ns\": [ 12936295835, 12915034175, 12922757054 ],\n \"samples_ts\": [ 9.89464, 9.91093, 9.90501 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:56:59Z\",\n \"avg_ns\": 23038240806,\n \"stddev_ns\": 58435865,\n \"avg_ts\": 5.556004,\n \"stddev_ts\": 0.014112,\n \"samples_ns\": [ 22971196640, 23078361156, 23065164623 ],\n \"samples_ts\": [ 5.5722, 5.54632, 5.54949 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T21:56:08Z", + "avg_ns": 12924695688, + "stddev_ns": 10762586, + "avg_ts": 9.903526, + "stddev_ts": 0.008245, + "samples_ns": [ + 12936295835, + 12915034175, + 12922757054 + ], + "samples_ts": [ + 9.89464, + 9.91093, + 9.90501 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T21:56:59Z", + "avg_ns": 23038240806, + "stddev_ns": 58435865, + "avg_ts": 5.556004, + "stddev_ts": 0.014112, + "samples_ns": [ + 22971196640, + 23078361156, + 23065164623 + ], + "samples_ts": [ + 5.5722, + 5.54632, + 5.54949 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1260 + }, + { + "timestamp_utc": "2025-12-11T22:03:43.687675+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:58:10Z\",\n \"avg_ns\": 12921741804,\n \"stddev_ns\": 6510995,\n \"avg_ts\": 9.905787,\n \"stddev_ts\": 0.004991,\n \"samples_ns\": [ 12923677095, 12927063691, 12914484628 ],\n \"samples_ts\": [ 9.9043, 9.90171, 9.91135 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T21:59:02Z\",\n \"avg_ns\": 93779955011,\n \"stddev_ns\": 65894277,\n \"avg_ts\": 5.459591,\n \"stddev_ts\": 0.003837,\n \"samples_ns\": [ 93796611217, 93707330844, 93835922972 ],\n \"samples_ts\": [ 5.45862, 5.46382, 5.45633 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T21:58:10Z", + "avg_ns": 12921741804, + "stddev_ns": 6510995, + "avg_ts": 9.905787, + "stddev_ts": 0.004991, + "samples_ns": [ + 12923677095, + 12927063691, + 12914484628 + ], + "samples_ts": [ + 9.9043, + 9.90171, + 9.91135 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T21:59:02Z", + "avg_ns": 93779955011, + "stddev_ns": 65894277, + "avg_ts": 5.459591, + "stddev_ts": 0.003837, + "samples_ns": [ + 93796611217, + 93707330844, + 93835922972 + ], + "samples_ts": [ + 5.45862, + 5.46382, + 5.45633 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1261 + }, + { + "timestamp_utc": "2025-12-11T22:08:21.734080+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:03:44Z\",\n \"avg_ns\": 51806090249,\n \"stddev_ns\": 22305441,\n \"avg_ts\": 9.883009,\n \"stddev_ts\": 0.004254,\n \"samples_ns\": [ 51830540302, 51786856150, 51800874296 ],\n \"samples_ts\": [ 9.87835, 9.88668, 9.884 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:07:12Z\",\n \"avg_ns\": 23130216386,\n \"stddev_ns\": 53052811,\n \"avg_ts\": 5.533906,\n \"stddev_ts\": 0.012710,\n \"samples_ns\": [ 23069003269, 23158756484, 23162889407 ],\n \"samples_ts\": [ 5.54857, 5.52707, 5.52608 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T22:03:44Z", + "avg_ns": 51806090249, + "stddev_ns": 22305441, + "avg_ts": 9.883009, + "stddev_ts": 0.004254, + "samples_ns": [ + 51830540302, + 51786856150, + 51800874296 + ], + "samples_ts": [ + 9.87835, + 9.88668, + 9.884 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T22:07:12Z", + "avg_ns": 23130216386, + "stddev_ns": 53052811, + "avg_ts": 5.533906, + "stddev_ts": 0.01271, + "samples_ns": [ + 23069003269, + 23158756484, + 23162889407 + ], + "samples_ts": [ + 5.54857, + 5.52707, + 5.52608 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1262 + }, + { + "timestamp_utc": "2025-12-11T22:16:31.455457+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:08:22Z\",\n \"avg_ns\": 51829222141,\n \"stddev_ns\": 13750735,\n \"avg_ts\": 9.878597,\n \"stddev_ts\": 0.002620,\n \"samples_ns\": [ 51843838649, 51816552232, 51827275544 ],\n \"samples_ts\": [ 9.87581, 9.88101, 9.87897 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:11:50Z\",\n \"avg_ns\": 93661747509,\n \"stddev_ns\": 68298392,\n \"avg_ts\": 5.466481,\n \"stddev_ts\": 0.003987,\n \"samples_ns\": [ 93585504938, 93717326910, 93682410681 ],\n \"samples_ts\": [ 5.47093, 5.46324, 5.46527 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T22:08:22Z", + "avg_ns": 51829222141, + "stddev_ns": 13750735, + "avg_ts": 9.878597, + "stddev_ts": 0.00262, + "samples_ns": [ + 51843838649, + 51816552232, + 51827275544 + ], + "samples_ts": [ + 9.87581, + 9.88101, + 9.87897 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T22:11:50Z", + "avg_ns": 93661747509, + "stddev_ns": 68298392, + "avg_ts": 5.466481, + "stddev_ts": 0.003987, + "samples_ns": [ + 93585504938, + 93717326910, + 93682410681 + ], + "samples_ts": [ + 5.47093, + 5.46324, + 5.46527 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1263 + }, + { + "timestamp_utc": "2025-12-11T22:18:33.747209+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:16:32Z\",\n \"avg_ns\": 12927932017,\n \"stddev_ns\": 9770752,\n \"avg_ts\": 9.901046,\n \"stddev_ts\": 0.007479,\n \"samples_ns\": [ 12923423634, 12921230175, 12939142243 ],\n \"samples_ts\": [ 9.9045, 9.90618, 9.89246 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:17:24Z\",\n \"avg_ns\": 23050065821,\n \"stddev_ns\": 39375965,\n \"avg_ts\": 5.553140,\n \"stddev_ts\": 0.009494,\n \"samples_ns\": [ 23005510898, 23064496971, 23080189596 ],\n \"samples_ts\": [ 5.56388, 5.54965, 5.54588 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T22:16:32Z", + "avg_ns": 12927932017, + "stddev_ns": 9770752, + "avg_ts": 9.901046, + "stddev_ts": 0.007479, + "samples_ns": [ + 12923423634, + 12921230175, + 12939142243 + ], + "samples_ts": [ + 9.9045, + 9.90618, + 9.89246 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T22:17:24Z", + "avg_ns": 23050065821, + "stddev_ns": 39375965, + "avg_ts": 5.55314, + "stddev_ts": 0.009494, + "samples_ns": [ + 23005510898, + 23064496971, + 23080189596 + ], + "samples_ts": [ + 5.56388, + 5.54965, + 5.54588 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1264 + }, + { + "timestamp_utc": "2025-12-11T22:24:07.736370+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:18:34Z\",\n \"avg_ns\": 12920892753,\n \"stddev_ns\": 6287031,\n \"avg_ts\": 9.906438,\n \"stddev_ts\": 0.004820,\n \"samples_ns\": [ 12924986084, 12913656252, 12924035925 ],\n \"samples_ts\": [ 9.9033, 9.91199, 9.90403 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:19:26Z\",\n \"avg_ns\": 93615694430,\n \"stddev_ns\": 40861437,\n \"avg_ts\": 5.469169,\n \"stddev_ts\": 0.002387,\n \"samples_ns\": [ 93591618438, 93592592372, 93662872481 ],\n \"samples_ts\": [ 5.47058, 5.47052, 5.46641 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T22:18:34Z", + "avg_ns": 12920892753, + "stddev_ns": 6287031, + "avg_ts": 9.906438, + "stddev_ts": 0.00482, + "samples_ns": [ + 12924986084, + 12913656252, + 12924035925 + ], + "samples_ts": [ + 9.9033, + 9.91199, + 9.90403 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T22:19:26Z", + "avg_ns": 93615694430, + "stddev_ns": 40861437, + "avg_ts": 5.469169, + "stddev_ts": 0.002387, + "samples_ns": [ + 93591618438, + 93592592372, + 93662872481 + ], + "samples_ts": [ + 5.47058, + 5.47052, + 5.46641 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1265 + }, + { + "timestamp_utc": "2025-12-11T22:28:46.400337+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:24:08Z\",\n \"avg_ns\": 52024722800,\n \"stddev_ns\": 12235991,\n \"avg_ts\": 9.841475,\n \"stddev_ts\": 0.002315,\n \"samples_ns\": [ 52010783710, 52029706448, 52033678243 ],\n \"samples_ts\": [ 9.84411, 9.84053, 9.83978 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:27:37Z\",\n \"avg_ns\": 23036975987,\n \"stddev_ns\": 122268671,\n \"avg_ts\": 5.556389,\n \"stddev_ts\": 0.029581,\n \"samples_ns\": [ 22895863435, 23103654035, 23111410492 ],\n \"samples_ts\": [ 5.59053, 5.54025, 5.53839 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T22:24:08Z", + "avg_ns": 52024722800, + "stddev_ns": 12235991, + "avg_ts": 9.841475, + "stddev_ts": 0.002315, + "samples_ns": [ + 52010783710, + 52029706448, + 52033678243 + ], + "samples_ts": [ + 9.84411, + 9.84053, + 9.83978 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T22:27:37Z", + "avg_ns": 23036975987, + "stddev_ns": 122268671, + "avg_ts": 5.556389, + "stddev_ts": 0.029581, + "samples_ns": [ + 22895863435, + 23103654035, + 23111410492 + ], + "samples_ts": [ + 5.59053, + 5.54025, + 5.53839 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1266 + }, + { + "timestamp_utc": "2025-12-11T22:36:57.297223+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:28:47Z\",\n \"avg_ns\": 52034233847,\n \"stddev_ns\": 26144456,\n \"avg_ts\": 9.839678,\n \"stddev_ts\": 0.004942,\n \"samples_ns\": [ 52064178105, 52015947106, 52022576331 ],\n \"samples_ts\": [ 9.83402, 9.84314, 9.84188 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:32:15Z\",\n \"avg_ns\": 93766520805,\n \"stddev_ns\": 59059935,\n \"avg_ts\": 5.460373,\n \"stddev_ts\": 0.003438,\n \"samples_ns\": [ 93740031347, 93725344768, 93834186302 ],\n \"samples_ts\": [ 5.46191, 5.46277, 5.45643 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T22:28:47Z", + "avg_ns": 52034233847, + "stddev_ns": 26144456, + "avg_ts": 9.839678, + "stddev_ts": 0.004942, + "samples_ns": [ + 52064178105, + 52015947106, + 52022576331 + ], + "samples_ts": [ + 9.83402, + 9.84314, + 9.84188 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T22:32:15Z", + "avg_ns": 93766520805, + "stddev_ns": 59059935, + "avg_ts": 5.460373, + "stddev_ts": 0.003438, + "samples_ns": [ + 93740031347, + 93725344768, + 93834186302 + ], + "samples_ts": [ + 5.46191, + 5.46277, + 5.45643 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1267 + }, + { + "timestamp_utc": "2025-12-11T22:38:59.539496+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:36:58Z\",\n \"avg_ns\": 12918558177,\n \"stddev_ns\": 7192802,\n \"avg_ts\": 9.908229,\n \"stddev_ts\": 0.005515,\n \"samples_ns\": [ 12926214083, 12917514270, 12911946180 ],\n \"samples_ts\": [ 9.90236, 9.90903, 9.9133 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:37:50Z\",\n \"avg_ns\": 23044248318,\n \"stddev_ns\": 46528010,\n \"avg_ts\": 5.554546,\n \"stddev_ts\": 0.011219,\n \"samples_ns\": [ 22995387203, 23088025098, 23049332654 ],\n \"samples_ts\": [ 5.56633, 5.544, 5.55331 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T22:36:58Z", + "avg_ns": 12918558177, + "stddev_ns": 7192802, + "avg_ts": 9.908229, + "stddev_ts": 0.005515, + "samples_ns": [ + 12926214083, + 12917514270, + 12911946180 + ], + "samples_ts": [ + 9.90236, + 9.90903, + 9.9133 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T22:37:50Z", + "avg_ns": 23044248318, + "stddev_ns": 46528010, + "avg_ts": 5.554546, + "stddev_ts": 0.011219, + "samples_ns": [ + 22995387203, + 23088025098, + 23049332654 + ], + "samples_ts": [ + 5.56633, + 5.544, + 5.55331 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1268 + }, + { + "timestamp_utc": "2025-12-11T22:44:33.796462+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:39:00Z\",\n \"avg_ns\": 12921536672,\n \"stddev_ns\": 4288045,\n \"avg_ts\": 9.905943,\n \"stddev_ts\": 0.003285,\n \"samples_ns\": [ 12916747659, 12922853913, 12925008446 ],\n \"samples_ts\": [ 9.90962, 9.90493, 9.90328 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:39:52Z\",\n \"avg_ns\": 93701171287,\n \"stddev_ns\": 41252890,\n \"avg_ts\": 5.464180,\n \"stddev_ts\": 0.002405,\n \"samples_ns\": [ 93669163014, 93686625299, 93747725549 ],\n \"samples_ts\": [ 5.46605, 5.46503, 5.46147 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T22:39:00Z", + "avg_ns": 12921536672, + "stddev_ns": 4288045, + "avg_ts": 9.905943, + "stddev_ts": 0.003285, + "samples_ns": [ + 12916747659, + 12922853913, + 12925008446 + ], + "samples_ts": [ + 9.90962, + 9.90493, + 9.90328 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T22:39:52Z", + "avg_ns": 93701171287, + "stddev_ns": 41252890, + "avg_ts": 5.46418, + "stddev_ts": 0.002405, + "samples_ns": [ + 93669163014, + 93686625299, + 93747725549 + ], + "samples_ts": [ + 5.46605, + 5.46503, + 5.46147 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1269 + }, + { + "timestamp_utc": "2025-12-11T22:49:14.001022+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:44:34Z\",\n \"avg_ns\": 52443491633,\n \"stddev_ns\": 12842860,\n \"avg_ts\": 9.762890,\n \"stddev_ts\": 0.002391,\n \"samples_ns\": [ 52453666864, 52447746799, 52429061236 ],\n \"samples_ts\": [ 9.761, 9.7621, 9.76558 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:48:04Z\",\n \"avg_ns\": 22997594039,\n \"stddev_ns\": 69619933,\n \"avg_ts\": 5.565834,\n \"stddev_ts\": 0.016869,\n \"samples_ns\": [ 22920401127, 23016752652, 23055628340 ],\n \"samples_ts\": [ 5.58454, 5.56117, 5.55179 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T22:44:34Z", + "avg_ns": 52443491633, + "stddev_ns": 12842860, + "avg_ts": 9.76289, + "stddev_ts": 0.002391, + "samples_ns": [ + 52453666864, + 52447746799, + 52429061236 + ], + "samples_ts": [ + 9.761, + 9.7621, + 9.76558 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T22:48:04Z", + "avg_ns": 22997594039, + "stddev_ns": 69619933, + "avg_ts": 5.565834, + "stddev_ts": 0.016869, + "samples_ns": [ + 22920401127, + 23016752652, + 23055628340 + ], + "samples_ts": [ + 5.58454, + 5.56117, + 5.55179 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1270 + }, + { + "timestamp_utc": "2025-12-11T22:57:25.392508+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:49:15Z\",\n \"avg_ns\": 52424787062,\n \"stddev_ns\": 8968956,\n \"avg_ts\": 9.766373,\n \"stddev_ts\": 0.001670,\n \"samples_ns\": [ 52424386989, 52433946436, 52416027762 ],\n \"samples_ts\": [ 9.76645, 9.76467, 9.768 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:52:44Z\",\n \"avg_ns\": 93407669527,\n \"stddev_ns\": 149570025,\n \"avg_ts\": 5.481358,\n \"stddev_ts\": 0.008783,\n \"samples_ns\": [ 93240629438, 93529194821, 93453184322 ],\n \"samples_ts\": [ 5.49117, 5.47423, 5.47868 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T22:49:15Z", + "avg_ns": 52424787062, + "stddev_ns": 8968956, + "avg_ts": 9.766373, + "stddev_ts": 0.00167, + "samples_ns": [ + 52424386989, + 52433946436, + 52416027762 + ], + "samples_ts": [ + 9.76645, + 9.76467, + 9.768 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T22:52:44Z", + "avg_ns": 93407669527, + "stddev_ns": 149570025, + "avg_ts": 5.481358, + "stddev_ts": 0.008783, + "samples_ns": [ + 93240629438, + 93529194821, + 93453184322 + ], + "samples_ts": [ + 5.49117, + 5.47423, + 5.47868 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1271 + }, + { + "timestamp_utc": "2025-12-11T22:59:27.684345+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:57:26Z\",\n \"avg_ns\": 12922456459,\n \"stddev_ns\": 3299055,\n \"avg_ts\": 9.905238,\n \"stddev_ts\": 0.002527,\n \"samples_ns\": [ 12918989791, 12922826892, 12925552695 ],\n \"samples_ts\": [ 9.9079, 9.90495, 9.90286 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:58:18Z\",\n \"avg_ns\": 23059445470,\n \"stddev_ns\": 13151816,\n \"avg_ts\": 5.550872,\n \"stddev_ts\": 0.003165,\n \"samples_ns\": [ 23049598546, 23074379063, 23054358803 ],\n \"samples_ts\": [ 5.55324, 5.54728, 5.5521 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T22:57:26Z", + "avg_ns": 12922456459, + "stddev_ns": 3299055, + "avg_ts": 9.905238, + "stddev_ts": 0.002527, + "samples_ns": [ + 12918989791, + 12922826892, + 12925552695 + ], + "samples_ts": [ + 9.9079, + 9.90495, + 9.90286 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T22:58:18Z", + "avg_ns": 23059445470, + "stddev_ns": 13151816, + "avg_ts": 5.550872, + "stddev_ts": 0.003165, + "samples_ns": [ + 23049598546, + 23074379063, + 23054358803 + ], + "samples_ts": [ + 5.55324, + 5.54728, + 5.5521 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1272 + }, + { + "timestamp_utc": "2025-12-11T23:05:01.543825+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T22:59:28Z\",\n \"avg_ns\": 12915229276,\n \"stddev_ns\": 7861315,\n \"avg_ts\": 9.910783,\n \"stddev_ts\": 0.006033,\n \"samples_ns\": [ 12922570138, 12916181742, 12906935949 ],\n \"samples_ts\": [ 9.90515, 9.91005, 9.91715 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:00:20Z\",\n \"avg_ns\": 93587346393,\n \"stddev_ns\": 38519023,\n \"avg_ts\": 5.470826,\n \"stddev_ts\": 0.002252,\n \"samples_ns\": [ 93584124829, 93550540542, 93627373809 ],\n \"samples_ts\": [ 5.47101, 5.47298, 5.46849 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T22:59:28Z", + "avg_ns": 12915229276, + "stddev_ns": 7861315, + "avg_ts": 9.910783, + "stddev_ts": 0.006033, + "samples_ns": [ + 12922570138, + 12916181742, + 12906935949 + ], + "samples_ts": [ + 9.90515, + 9.91005, + 9.91715 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T23:00:20Z", + "avg_ns": 93587346393, + "stddev_ns": 38519023, + "avg_ts": 5.470826, + "stddev_ts": 0.002252, + "samples_ns": [ + 93584124829, + 93550540542, + 93627373809 + ], + "samples_ts": [ + 5.47101, + 5.47298, + 5.46849 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1273 + }, + { + "timestamp_utc": "2025-12-11T23:09:39.169771+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:05:02Z\",\n \"avg_ns\": 51778719238,\n \"stddev_ns\": 15490017,\n \"avg_ts\": 9.888233,\n \"stddev_ts\": 0.002957,\n \"samples_ns\": [ 51771556227, 51796490724, 51768110765 ],\n \"samples_ts\": [ 9.8896, 9.88484, 9.89026 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:08:29Z\",\n \"avg_ns\": 23023335194,\n \"stddev_ns\": 28480974,\n \"avg_ts\": 5.559582,\n \"stddev_ts\": 0.006882,\n \"samples_ns\": [ 22990539954, 23041853443, 23037612186 ],\n \"samples_ts\": [ 5.56751, 5.55511, 5.55613 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T23:05:02Z", + "avg_ns": 51778719238, + "stddev_ns": 15490017, + "avg_ts": 9.888233, + "stddev_ts": 0.002957, + "samples_ns": [ + 51771556227, + 51796490724, + 51768110765 + ], + "samples_ts": [ + 9.8896, + 9.88484, + 9.89026 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T23:08:29Z", + "avg_ns": 23023335194, + "stddev_ns": 28480974, + "avg_ts": 5.559582, + "stddev_ts": 0.006882, + "samples_ns": [ + 22990539954, + 23041853443, + 23037612186 + ], + "samples_ts": [ + 5.56751, + 5.55511, + 5.55613 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1274 + }, + { + "timestamp_utc": "2025-12-11T23:17:48.096892+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:09:40Z\",\n \"avg_ns\": 51774613588,\n \"stddev_ns\": 3609448,\n \"avg_ts\": 9.889016,\n \"stddev_ts\": 0.000688,\n \"samples_ns\": [ 51775497392, 51777691702, 51770651671 ],\n \"samples_ts\": [ 9.88885, 9.88843, 9.88977 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:13:07Z\",\n \"avg_ns\": 93448746505,\n \"stddev_ns\": 77270887,\n \"avg_ts\": 5.478942,\n \"stddev_ts\": 0.004533,\n \"samples_ns\": [ 93359557302, 93495478294, 93491203921 ],\n \"samples_ts\": [ 5.48417, 5.4762, 5.47645 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T23:09:40Z", + "avg_ns": 51774613588, + "stddev_ns": 3609448, + "avg_ts": 9.889016, + "stddev_ts": 0.000688, + "samples_ns": [ + 51775497392, + 51777691702, + 51770651671 + ], + "samples_ts": [ + 9.88885, + 9.88843, + 9.88977 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T23:13:07Z", + "avg_ns": 93448746505, + "stddev_ns": 77270887, + "avg_ts": 5.478942, + "stddev_ts": 0.004533, + "samples_ns": [ + 93359557302, + 93495478294, + 93491203921 + ], + "samples_ts": [ + 5.48417, + 5.4762, + 5.47645 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1275 + }, + { + "timestamp_utc": "2025-12-11T23:19:50.221220+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:17:49Z\",\n \"avg_ns\": 12911270230,\n \"stddev_ns\": 3699205,\n \"avg_ts\": 9.913820,\n \"stddev_ts\": 0.002840,\n \"samples_ns\": [ 12908840271, 12915527511, 12909442908 ],\n \"samples_ts\": [ 9.91569, 9.91055, 9.91522 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:18:40Z\",\n \"avg_ns\": 23020667975,\n \"stddev_ns\": 29065842,\n \"avg_ts\": 5.560227,\n \"stddev_ts\": 0.007025,\n \"samples_ns\": [ 22987558719, 23041982445, 23032462761 ],\n \"samples_ts\": [ 5.56823, 5.55508, 5.55737 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T23:17:49Z", + "avg_ns": 12911270230, + "stddev_ns": 3699205, + "avg_ts": 9.91382, + "stddev_ts": 0.00284, + "samples_ns": [ + 12908840271, + 12915527511, + 12909442908 + ], + "samples_ts": [ + 9.91569, + 9.91055, + 9.91522 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T23:18:40Z", + "avg_ns": 23020667975, + "stddev_ns": 29065842, + "avg_ts": 5.560227, + "stddev_ts": 0.007025, + "samples_ns": [ + 22987558719, + 23041982445, + 23032462761 + ], + "samples_ts": [ + 5.56823, + 5.55508, + 5.55737 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1276 + }, + { + "timestamp_utc": "2025-12-11T23:25:23.758550+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:19:51Z\",\n \"avg_ns\": 12912258945,\n \"stddev_ns\": 4006272,\n \"avg_ts\": 9.913061,\n \"stddev_ts\": 0.003075,\n \"samples_ns\": [ 12915193024, 12913887176, 12907696636 ],\n \"samples_ts\": [ 9.91081, 9.91181, 9.91656 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:20:43Z\",\n \"avg_ns\": 93477694862,\n \"stddev_ns\": 55686867,\n \"avg_ts\": 5.477244,\n \"stddev_ts\": 0.003263,\n \"samples_ns\": [ 93474282165, 93535009594, 93423792827 ],\n \"samples_ts\": [ 5.47744, 5.47389, 5.4804 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T23:19:51Z", + "avg_ns": 12912258945, + "stddev_ns": 4006272, + "avg_ts": 9.913061, + "stddev_ts": 0.003075, + "samples_ns": [ + 12915193024, + 12913887176, + 12907696636 + ], + "samples_ts": [ + 9.91081, + 9.91181, + 9.91656 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T23:20:43Z", + "avg_ns": 93477694862, + "stddev_ns": 55686867, + "avg_ts": 5.477244, + "stddev_ts": 0.003263, + "samples_ns": [ + 93474282165, + 93535009594, + 93423792827 + ], + "samples_ts": [ + 5.47744, + 5.47389, + 5.4804 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1277 + }, + { + "timestamp_utc": "2025-12-11T23:30:02.486762+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:25:24Z\",\n \"avg_ns\": 52001708037,\n \"stddev_ns\": 6786771,\n \"avg_ts\": 9.845831,\n \"stddev_ts\": 0.001284,\n \"samples_ns\": [ 52008667567, 52001331493, 51995125053 ],\n \"samples_ts\": [ 9.84451, 9.8459, 9.84708 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:28:52Z\",\n \"avg_ns\": 23092597261,\n \"stddev_ns\": 12924833,\n \"avg_ts\": 5.542903,\n \"stddev_ts\": 0.003102,\n \"samples_ns\": [ 23091686499, 23080151898, 23105953386 ],\n \"samples_ts\": [ 5.54312, 5.54589, 5.5397 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T23:25:24Z", + "avg_ns": 52001708037, + "stddev_ns": 6786771, + "avg_ts": 9.845831, + "stddev_ts": 0.001284, + "samples_ns": [ + 52008667567, + 52001331493, + 51995125053 + ], + "samples_ts": [ + 9.84451, + 9.8459, + 9.84708 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T23:28:52Z", + "avg_ns": 23092597261, + "stddev_ns": 12924833, + "avg_ts": 5.542903, + "stddev_ts": 0.003102, + "samples_ns": [ + 23091686499, + 23080151898, + 23105953386 + ], + "samples_ts": [ + 5.54312, + 5.54589, + 5.5397 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1278 + }, + { + "timestamp_utc": "2025-12-11T23:38:12.473730+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:30:03Z\",\n \"avg_ns\": 52009210014,\n \"stddev_ns\": 5506985,\n \"avg_ts\": 9.844410,\n \"stddev_ts\": 0.001041,\n \"samples_ns\": [ 52013731346, 52003090457, 52010808241 ],\n \"samples_ts\": [ 9.84355, 9.84557, 9.84411 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:33:31Z\",\n \"avg_ns\": 93487464132,\n \"stddev_ns\": 42216984,\n \"avg_ts\": 5.476671,\n \"stddev_ts\": 0.002473,\n \"samples_ns\": [ 93448355220, 93481816534, 93532220642 ],\n \"samples_ts\": [ 5.47896, 5.477, 5.47405 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T23:30:03Z", + "avg_ns": 52009210014, + "stddev_ns": 5506985, + "avg_ts": 9.84441, + "stddev_ts": 0.001041, + "samples_ns": [ + 52013731346, + 52003090457, + 52010808241 + ], + "samples_ts": [ + 9.84355, + 9.84557, + 9.84411 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T23:33:31Z", + "avg_ns": 93487464132, + "stddev_ns": 42216984, + "avg_ts": 5.476671, + "stddev_ts": 0.002473, + "samples_ns": [ + 93448355220, + 93481816534, + 93532220642 + ], + "samples_ts": [ + 5.47896, + 5.477, + 5.47405 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1279 + }, + { + "timestamp_utc": "2025-12-11T23:40:14.672022+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:38:13Z\",\n \"avg_ns\": 12906730462,\n \"stddev_ns\": 1749252,\n \"avg_ts\": 9.917306,\n \"stddev_ts\": 0.001338,\n \"samples_ns\": [ 12908577537, 12905117500, 12906496351 ],\n \"samples_ts\": [ 9.91589, 9.91855, 9.91749 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:39:05Z\",\n \"avg_ns\": 23044588517,\n \"stddev_ns\": 35322006,\n \"avg_ts\": 5.554458,\n \"stddev_ts\": 0.008521,\n \"samples_ns\": [ 23003914793, 23067539602, 23062311158 ],\n \"samples_ts\": [ 5.56427, 5.54892, 5.55018 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T23:38:13Z", + "avg_ns": 12906730462, + "stddev_ns": 1749252, + "avg_ts": 9.917306, + "stddev_ts": 0.001338, + "samples_ns": [ + 12908577537, + 12905117500, + 12906496351 + ], + "samples_ts": [ + 9.91589, + 9.91855, + 9.91749 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T23:39:05Z", + "avg_ns": 23044588517, + "stddev_ns": 35322006, + "avg_ts": 5.554458, + "stddev_ts": 0.008521, + "samples_ns": [ + 23003914793, + 23067539602, + 23062311158 + ], + "samples_ts": [ + 5.56427, + 5.54892, + 5.55018 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1280 + }, + { + "timestamp_utc": "2025-12-11T23:45:48.292960+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:40:15Z\",\n \"avg_ns\": 12912138037,\n \"stddev_ns\": 3251647,\n \"avg_ts\": 9.913153,\n \"stddev_ts\": 0.002493,\n \"samples_ns\": [ 12911313272, 12915718576, 12909382265 ],\n \"samples_ts\": [ 9.91379, 9.9104, 9.91527 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:41:07Z\",\n \"avg_ns\": 93488096909,\n \"stddev_ns\": 58016239,\n \"avg_ts\": 5.476634,\n \"stddev_ts\": 0.003400,\n \"samples_ns\": [ 93422081643, 93511246274, 93530962812 ],\n \"samples_ts\": [ 5.4805, 5.47528, 5.47412 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T23:40:15Z", + "avg_ns": 12912138037, + "stddev_ns": 3251647, + "avg_ts": 9.913153, + "stddev_ts": 0.002493, + "samples_ns": [ + 12911313272, + 12915718576, + 12909382265 + ], + "samples_ts": [ + 9.91379, + 9.9104, + 9.91527 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T23:41:07Z", + "avg_ns": 93488096909, + "stddev_ns": 58016239, + "avg_ts": 5.476634, + "stddev_ts": 0.0034, + "samples_ns": [ + 93422081643, + 93511246274, + 93530962812 + ], + "samples_ts": [ + 5.4805, + 5.47528, + 5.47412 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1281 + }, + { + "timestamp_utc": "2025-12-11T23:50:28.347177+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:45:49Z\",\n \"avg_ns\": 52425344094,\n \"stddev_ns\": 6208984,\n \"avg_ts\": 9.766269,\n \"stddev_ts\": 0.001157,\n \"samples_ns\": [ 52418254355, 52427965284, 52429812643 ],\n \"samples_ts\": [ 9.76759, 9.76578, 9.76544 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:49:19Z\",\n \"avg_ns\": 22959614102,\n \"stddev_ns\": 160664203,\n \"avg_ts\": 5.575189,\n \"stddev_ts\": 0.039170,\n \"samples_ns\": [ 22774345572, 23043904564, 23060592172 ],\n \"samples_ts\": [ 5.62036, 5.55461, 5.55059 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T23:45:49Z", + "avg_ns": 52425344094, + "stddev_ns": 6208984, + "avg_ts": 9.766269, + "stddev_ts": 0.001157, + "samples_ns": [ + 52418254355, + 52427965284, + 52429812643 + ], + "samples_ts": [ + 9.76759, + 9.76578, + 9.76544 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T23:49:19Z", + "avg_ns": 22959614102, + "stddev_ns": 160664203, + "avg_ts": 5.575189, + "stddev_ts": 0.03917, + "samples_ns": [ + 22774345572, + 23043904564, + 23060592172 + ], + "samples_ts": [ + 5.62036, + 5.55461, + 5.55059 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1282 + }, + { + "timestamp_utc": "2025-12-11T23:58:39.439417+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:50:29Z\",\n \"avg_ns\": 52410911796,\n \"stddev_ns\": 3733453,\n \"avg_ts\": 9.768958,\n \"stddev_ts\": 0.000696,\n \"samples_ns\": [ 52411031518, 52414583949, 52407119921 ],\n \"samples_ts\": [ 9.76894, 9.76827, 9.76966 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:53:59Z\",\n \"avg_ns\": 93320661471,\n \"stddev_ns\": 51744120,\n \"avg_ts\": 5.486460,\n \"stddev_ts\": 0.003043,\n \"samples_ns\": [ 93261238953, 93355765677, 93344979784 ],\n \"samples_ts\": [ 5.48995, 5.4844, 5.48503 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T23:50:29Z", + "avg_ns": 52410911796, + "stddev_ns": 3733453, + "avg_ts": 9.768958, + "stddev_ts": 0.000696, + "samples_ns": [ + 52411031518, + 52414583949, + 52407119921 + ], + "samples_ts": [ + 9.76894, + 9.76827, + 9.76966 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-11T23:53:59Z", + "avg_ns": 93320661471, + "stddev_ns": 51744120, + "avg_ts": 5.48646, + "stddev_ts": 0.003043, + "samples_ns": [ + 93261238953, + 93355765677, + 93344979784 + ], + "samples_ts": [ + 5.48995, + 5.4844, + 5.48503 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1283 + }, + { + "timestamp_utc": "2025-12-12T00:00:41.922339+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:58:40Z\",\n \"avg_ns\": 12912498217,\n \"stddev_ns\": 2739622,\n \"avg_ts\": 9.912877,\n \"stddev_ts\": 0.002100,\n \"samples_ns\": [ 12912328643, 12909852046, 12915313964 ],\n \"samples_ts\": [ 9.91301, 9.91491, 9.91072 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-11T23:59:32Z\",\n \"avg_ns\": 23133574753,\n \"stddev_ns\": 141184699,\n \"avg_ts\": 5.533221,\n \"stddev_ts\": 0.033882,\n \"samples_ns\": [ 22971606464, 23230615812, 23198501983 ],\n \"samples_ts\": [ 5.5721, 5.50997, 5.5176 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-11T23:58:40Z", + "avg_ns": 12912498217, + "stddev_ns": 2739622, + "avg_ts": 9.912877, + "stddev_ts": 0.0021, + "samples_ns": [ + 12912328643, + 12909852046, + 12915313964 + ], + "samples_ts": [ + 9.91301, + 9.91491, + 9.91072 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-11T23:59:32Z", + "avg_ns": 23133574753, + "stddev_ns": 141184699, + "avg_ts": 5.533221, + "stddev_ts": 0.033882, + "samples_ns": [ + 22971606464, + 23230615812, + 23198501983 + ], + "samples_ts": [ + 5.5721, + 5.50997, + 5.5176 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1284 + }, + { + "timestamp_utc": "2025-12-12T00:06:15.150172+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:00:43Z\",\n \"avg_ns\": 12908287217,\n \"stddev_ns\": 3539307,\n \"avg_ts\": 9.916111,\n \"stddev_ts\": 0.002716,\n \"samples_ns\": [ 12906444187, 12906053846, 12912363620 ],\n \"samples_ts\": [ 9.91753, 9.91783, 9.91298 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:01:34Z\",\n \"avg_ns\": 93356457802,\n \"stddev_ns\": 87997164,\n \"avg_ts\": 5.484359,\n \"stddev_ts\": 0.005170,\n \"samples_ns\": [ 93265923754, 93441674843, 93361774811 ],\n \"samples_ts\": [ 5.48968, 5.47935, 5.48404 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T00:00:43Z", + "avg_ns": 12908287217, + "stddev_ns": 3539307, + "avg_ts": 9.916111, + "stddev_ts": 0.002716, + "samples_ns": [ + 12906444187, + 12906053846, + 12912363620 + ], + "samples_ts": [ + 9.91753, + 9.91783, + 9.91298 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T00:01:34Z", + "avg_ns": 93356457802, + "stddev_ns": 87997164, + "avg_ts": 5.484359, + "stddev_ts": 0.00517, + "samples_ns": [ + 93265923754, + 93441674843, + 93361774811 + ], + "samples_ts": [ + 5.48968, + 5.47935, + 5.48404 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1285 + }, + { + "timestamp_utc": "2025-12-12T00:10:52.532473+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:06:16Z\",\n \"avg_ns\": 51760556523,\n \"stddev_ns\": 7142204,\n \"avg_ts\": 9.891702,\n \"stddev_ts\": 0.001364,\n \"samples_ns\": [ 51765512482, 51763778231, 51752378858 ],\n \"samples_ts\": [ 9.89075, 9.89109, 9.89327 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:09:43Z\",\n \"avg_ns\": 22962740552,\n \"stddev_ns\": 183153515,\n \"avg_ts\": 5.574485,\n \"stddev_ts\": 0.044668,\n \"samples_ns\": [ 22751259819, 23069942090, 23067019747 ],\n \"samples_ts\": [ 5.62606, 5.54835, 5.54905 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T00:06:16Z", + "avg_ns": 51760556523, + "stddev_ns": 7142204, + "avg_ts": 9.891702, + "stddev_ts": 0.001364, + "samples_ns": [ + 51765512482, + 51763778231, + 51752378858 + ], + "samples_ts": [ + 9.89075, + 9.89109, + 9.89327 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T00:09:43Z", + "avg_ns": 22962740552, + "stddev_ns": 183153515, + "avg_ts": 5.574485, + "stddev_ts": 0.044668, + "samples_ns": [ + 22751259819, + 23069942090, + 23067019747 + ], + "samples_ts": [ + 5.62606, + 5.54835, + 5.54905 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1286 + }, + { + "timestamp_utc": "2025-12-12T00:19:01.187108+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:10:53Z\",\n \"avg_ns\": 51772636114,\n \"stddev_ns\": 8040199,\n \"avg_ts\": 9.889394,\n \"stddev_ts\": 0.001535,\n \"samples_ns\": [ 51771728192, 51781088500, 51765091651 ],\n \"samples_ts\": [ 9.88957, 9.88778, 9.89084 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:14:20Z\",\n \"avg_ns\": 93378477082,\n \"stddev_ns\": 177282177,\n \"avg_ts\": 5.483075,\n \"stddev_ts\": 0.010419,\n \"samples_ns\": [ 93178204593, 93515317115, 93441909538 ],\n \"samples_ts\": [ 5.49485, 5.47504, 5.47934 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T00:10:53Z", + "avg_ns": 51772636114, + "stddev_ns": 8040199, + "avg_ts": 9.889394, + "stddev_ts": 0.001535, + "samples_ns": [ + 51771728192, + 51781088500, + 51765091651 + ], + "samples_ts": [ + 9.88957, + 9.88778, + 9.89084 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T00:14:20Z", + "avg_ns": 93378477082, + "stddev_ns": 177282177, + "avg_ts": 5.483075, + "stddev_ts": 0.010419, + "samples_ns": [ + 93178204593, + 93515317115, + 93441909538 + ], + "samples_ts": [ + 5.49485, + 5.47504, + 5.47934 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1287 + }, + { + "timestamp_utc": "2025-12-12T00:21:03.388860+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:19:02Z\",\n \"avg_ns\": 12910479338,\n \"stddev_ns\": 927424,\n \"avg_ts\": 9.914427,\n \"stddev_ts\": 0.000707,\n \"samples_ns\": [ 12910357953, 12911454446, 12909625616 ],\n \"samples_ts\": [ 9.91452, 9.91368, 9.91508 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:19:53Z\",\n \"avg_ns\": 23041193366,\n \"stddev_ns\": 68847071,\n \"avg_ts\": 5.555301,\n \"stddev_ts\": 0.016617,\n \"samples_ns\": [ 22965161629, 23099316458, 23059102011 ],\n \"samples_ts\": [ 5.57366, 5.54129, 5.55095 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T00:19:02Z", + "avg_ns": 12910479338, + "stddev_ns": 927424, + "avg_ts": 9.914427, + "stddev_ts": 0.000707, + "samples_ns": [ + 12910357953, + 12911454446, + 12909625616 + ], + "samples_ts": [ + 9.91452, + 9.91368, + 9.91508 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T00:19:53Z", + "avg_ns": 23041193366, + "stddev_ns": 68847071, + "avg_ts": 5.555301, + "stddev_ts": 0.016617, + "samples_ns": [ + 22965161629, + 23099316458, + 23059102011 + ], + "samples_ts": [ + 5.57366, + 5.54129, + 5.55095 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1288 + }, + { + "timestamp_utc": "2025-12-12T00:26:36.978607+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:21:04Z\",\n \"avg_ns\": 12906433601,\n \"stddev_ns\": 2375919,\n \"avg_ts\": 9.917535,\n \"stddev_ts\": 0.001826,\n \"samples_ns\": [ 12908470922, 12907006162, 12903823719 ],\n \"samples_ts\": [ 9.91597, 9.91709, 9.91954 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:21:56Z\",\n \"avg_ns\": 93466468682,\n \"stddev_ns\": 79555233,\n \"avg_ts\": 5.477903,\n \"stddev_ts\": 0.004663,\n \"samples_ns\": [ 93381592016, 93478476787, 93539337243 ],\n \"samples_ts\": [ 5.48288, 5.4772, 5.47363 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T00:21:04Z", + "avg_ns": 12906433601, + "stddev_ns": 2375919, + "avg_ts": 9.917535, + "stddev_ts": 0.001826, + "samples_ns": [ + 12908470922, + 12907006162, + 12903823719 + ], + "samples_ts": [ + 9.91597, + 9.91709, + 9.91954 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T00:21:56Z", + "avg_ns": 93466468682, + "stddev_ns": 79555233, + "avg_ts": 5.477903, + "stddev_ts": 0.004663, + "samples_ns": [ + 93381592016, + 93478476787, + 93539337243 + ], + "samples_ts": [ + 5.48288, + 5.4772, + 5.47363 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1289 + }, + { + "timestamp_utc": "2025-12-12T00:31:15.255031+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:26:38Z\",\n \"avg_ns\": 52009434889,\n \"stddev_ns\": 6410168,\n \"avg_ts\": 9.844368,\n \"stddev_ts\": 0.001213,\n \"samples_ns\": [ 52010572590, 52002536151, 52015195927 ],\n \"samples_ts\": [ 9.84415, 9.84567, 9.84328 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:30:06Z\",\n \"avg_ns\": 22927065547,\n \"stddev_ns\": 166771785,\n \"avg_ts\": 5.583119,\n \"stddev_ts\": 0.040782,\n \"samples_ns\": [ 22734634469, 23016914087, 23029648085 ],\n \"samples_ts\": [ 5.63018, 5.56113, 5.55805 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T00:26:38Z", + "avg_ns": 52009434889, + "stddev_ns": 6410168, + "avg_ts": 9.844368, + "stddev_ts": 0.001213, + "samples_ns": [ + 52010572590, + 52002536151, + 52015195927 + ], + "samples_ts": [ + 9.84415, + 9.84567, + 9.84328 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T00:30:06Z", + "avg_ns": 22927065547, + "stddev_ns": 166771785, + "avg_ts": 5.583119, + "stddev_ts": 0.040782, + "samples_ns": [ + 22734634469, + 23016914087, + 23029648085 + ], + "samples_ts": [ + 5.63018, + 5.56113, + 5.55805 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1290 + }, + { + "timestamp_utc": "2025-12-12T00:39:24.831784+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:31:16Z\",\n \"avg_ns\": 52011282095,\n \"stddev_ns\": 3112193,\n \"avg_ts\": 9.844018,\n \"stddev_ts\": 0.000587,\n \"samples_ns\": [ 52007840288, 52012137431, 52013868567 ],\n \"samples_ts\": [ 9.84467, 9.84386, 9.84353 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:34:44Z\",\n \"avg_ns\": 93359953117,\n \"stddev_ns\": 201419623,\n \"avg_ts\": 5.484167,\n \"stddev_ts\": 0.011846,\n \"samples_ns\": [ 93127972562, 93461502555, 93490384235 ],\n \"samples_ts\": [ 5.49781, 5.47819, 5.4765 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T00:31:16Z", + "avg_ns": 52011282095, + "stddev_ns": 3112193, + "avg_ts": 9.844018, + "stddev_ts": 0.000587, + "samples_ns": [ + 52007840288, + 52012137431, + 52013868567 + ], + "samples_ts": [ + 9.84467, + 9.84386, + 9.84353 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T00:34:44Z", + "avg_ns": 93359953117, + "stddev_ns": 201419623, + "avg_ts": 5.484167, + "stddev_ts": 0.011846, + "samples_ns": [ + 93127972562, + 93461502555, + 93490384235 + ], + "samples_ts": [ + 5.49781, + 5.47819, + 5.4765 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1291 + }, + { + "timestamp_utc": "2025-12-12T00:41:27.098676+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:39:26Z\",\n \"avg_ns\": 12912186184,\n \"stddev_ns\": 5597015,\n \"avg_ts\": 9.913117,\n \"stddev_ts\": 0.004296,\n \"samples_ns\": [ 12907422175, 12910786027, 12918350350 ],\n \"samples_ts\": [ 9.91677, 9.91419, 9.90839 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:40:17Z\",\n \"avg_ns\": 23048535822,\n \"stddev_ns\": 48236278,\n \"avg_ts\": 5.553514,\n \"stddev_ts\": 0.011633,\n \"samples_ns\": [ 22994279458, 23086567106, 23064760904 ],\n \"samples_ts\": [ 5.5666, 5.54435, 5.54959 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T00:39:26Z", + "avg_ns": 12912186184, + "stddev_ns": 5597015, + "avg_ts": 9.913117, + "stddev_ts": 0.004296, + "samples_ns": [ + 12907422175, + 12910786027, + 12918350350 + ], + "samples_ts": [ + 9.91677, + 9.91419, + 9.90839 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T00:40:17Z", + "avg_ns": 23048535822, + "stddev_ns": 48236278, + "avg_ts": 5.553514, + "stddev_ts": 0.011633, + "samples_ns": [ + 22994279458, + 23086567106, + 23064760904 + ], + "samples_ts": [ + 5.5666, + 5.54435, + 5.54959 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1292 + }, + { + "timestamp_utc": "2025-12-12T00:47:00.584021+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:41:28Z\",\n \"avg_ns\": 12917913485,\n \"stddev_ns\": 1200238,\n \"avg_ts\": 9.908721,\n \"stddev_ts\": 0.000912,\n \"samples_ns\": [ 12919259158, 12917002720, 12917478579 ],\n \"samples_ts\": [ 9.90769, 9.90942, 9.90905 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:42:19Z\",\n \"avg_ns\": 93455191906,\n \"stddev_ns\": 40726575,\n \"avg_ts\": 5.478562,\n \"stddev_ts\": 0.002388,\n \"samples_ns\": [ 93473817004, 93408484307, 93483274408 ],\n \"samples_ts\": [ 5.47747, 5.4813, 5.47692 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T00:41:28Z", + "avg_ns": 12917913485, + "stddev_ns": 1200238, + "avg_ts": 9.908721, + "stddev_ts": 0.000912, + "samples_ns": [ + 12919259158, + 12917002720, + 12917478579 + ], + "samples_ts": [ + 9.90769, + 9.90942, + 9.90905 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T00:42:19Z", + "avg_ns": 93455191906, + "stddev_ns": 40726575, + "avg_ts": 5.478562, + "stddev_ts": 0.002388, + "samples_ns": [ + 93473817004, + 93408484307, + 93483274408 + ], + "samples_ts": [ + 5.47747, + 5.4813, + 5.47692 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1293 + }, + { + "timestamp_utc": "2025-12-12T00:51:40.854569+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:47:01Z\",\n \"avg_ns\": 52417618966,\n \"stddev_ns\": 5552454,\n \"avg_ts\": 9.767708,\n \"stddev_ts\": 0.001033,\n \"samples_ns\": [ 52423705353, 52412860593, 52416290954 ],\n \"samples_ts\": [ 9.76657, 9.76859, 9.76796 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:50:31Z\",\n \"avg_ns\": 23047143990,\n \"stddev_ns\": 79157330,\n \"avg_ts\": 5.553877,\n \"stddev_ts\": 0.019080,\n \"samples_ns\": [ 22966251462, 23124443465, 23050737044 ],\n \"samples_ts\": [ 5.5734, 5.53527, 5.55297 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T00:47:01Z", + "avg_ns": 52417618966, + "stddev_ns": 5552454, + "avg_ts": 9.767708, + "stddev_ts": 0.001033, + "samples_ns": [ + 52423705353, + 52412860593, + 52416290954 + ], + "samples_ts": [ + 9.76657, + 9.76859, + 9.76796 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T00:50:31Z", + "avg_ns": 23047143990, + "stddev_ns": 79157330, + "avg_ts": 5.553877, + "stddev_ts": 0.01908, + "samples_ns": [ + 22966251462, + 23124443465, + 23050737044 + ], + "samples_ts": [ + 5.5734, + 5.53527, + 5.55297 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1294 + }, + { + "timestamp_utc": "2025-12-12T00:59:52.173752+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:51:42Z\",\n \"avg_ns\": 52414692240,\n \"stddev_ns\": 2998771,\n \"avg_ts\": 9.768254,\n \"stddev_ts\": 0.000556,\n \"samples_ns\": [ 52413535420, 52418078520, 52412462782 ],\n \"samples_ts\": [ 9.76847, 9.76762, 9.76867 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 4B Q2_K - Medium\",\n \"model_size\": 1722623232,\n \"model_n_params\": 3880263168,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T00:55:11Z\",\n \"avg_ns\": 93398392887,\n \"stddev_ns\": 71287126,\n \"avg_ts\": 5.481895,\n \"stddev_ts\": 0.004184,\n \"samples_ns\": [ 93329690407, 93472010288, 93393477966 ],\n \"samples_ts\": [ 5.48593, 5.47758, 5.48218 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T00:51:42Z", + "avg_ns": 52414692240, + "stddev_ns": 2998771, + "avg_ts": 9.768254, + "stddev_ts": 0.000556, + "samples_ns": [ + 52413535420, + 52418078520, + 52412462782 + ], + "samples_ts": [ + 9.76847, + 9.76762, + 9.76867 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_type": "gemma3 4B Q2_K - Medium", + "model_size": 1722623232, + "model_n_params": 3880263168, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T00:55:11Z", + "avg_ns": 93398392887, + "stddev_ns": 71287126, + "avg_ts": 5.481895, + "stddev_ts": 0.004184, + "samples_ns": [ + 93329690407, + 93472010288, + 93393477966 + ], + "samples_ts": [ + 5.48593, + 5.47758, + 5.48218 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-4B-it-GGUF/gemma-3-4b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-4B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1295 + }, + { + "timestamp_utc": "2025-12-12T01:21:46.864238+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T01:00:47Z\",\n \"avg_ns\": 168043614257,\n \"stddev_ns\": 30836230,\n \"avg_ts\": 0.761707,\n \"stddev_ts\": 0.000140,\n \"samples_ns\": [ 168079220355, 168025645585, 168025976831 ],\n \"samples_ts\": [ 0.761546, 0.761788, 0.761787 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T01:12:05Z\",\n \"avg_ns\": 193343911175,\n \"stddev_ns\": 12375716,\n \"avg_ts\": 0.662033,\n \"stddev_ts\": 0.000042,\n \"samples_ns\": [ 193357965035, 193339082379, 193334686112 ],\n \"samples_ts\": [ 0.661985, 0.662049, 0.662064 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T01:00:47Z", + "avg_ns": 168043614257, + "stddev_ns": 30836230, + "avg_ts": 0.761707, + "stddev_ts": 0.00014, + "samples_ns": [ + 168079220355, + 168025645585, + 168025976831 + ], + "samples_ts": [ + 0.761546, + 0.761788, + 0.761787 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T01:12:05Z", + "avg_ns": 193343911175, + "stddev_ns": 12375716, + "avg_ts": 0.662033, + "stddev_ts": 4.2e-05, + "samples_ns": [ + 193357965035, + 193339082379, + 193334686112 + ], + "samples_ts": [ + 0.661985, + 0.662049, + 0.662064 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1296 + }, + { + "timestamp_utc": "2025-12-12T02:11:59.141352+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T01:21:48Z\",\n \"avg_ns\": 168007756394,\n \"stddev_ns\": 2586732,\n \"avg_ts\": 0.761870,\n \"stddev_ts\": 0.000012,\n \"samples_ns\": [ 168010641784, 168006841973, 168005785426 ],\n \"samples_ts\": [ 0.761857, 0.761874, 0.761879 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T01:33:01Z\",\n \"avg_ns\": 778787046868,\n \"stddev_ns\": 17893857,\n \"avg_ts\": 0.657433,\n \"stddev_ts\": 0.000015,\n \"samples_ns\": [ 778807274381, 778780583580, 778773282643 ],\n \"samples_ts\": [ 0.657416, 0.657438, 0.657444 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T01:21:48Z", + "avg_ns": 168007756394, + "stddev_ns": 2586732, + "avg_ts": 0.76187, + "stddev_ts": 1.2e-05, + "samples_ns": [ + 168010641784, + 168006841973, + 168005785426 + ], + "samples_ts": [ + 0.761857, + 0.761874, + 0.761879 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T01:33:01Z", + "avg_ns": 778787046868, + "stddev_ns": 17893857, + "avg_ts": 0.657433, + "stddev_ts": 1.5e-05, + "samples_ns": [ + 778807274381, + 778780583580, + 778773282643 + ], + "samples_ts": [ + 0.657416, + 0.657438, + 0.657444 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1297 + }, + { + "timestamp_utc": "2025-12-12T03:06:38.071200+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T02:12:00Z\",\n \"avg_ns\": 673976699550,\n \"stddev_ns\": 4667636,\n \"avg_ts\": 0.759670,\n \"stddev_ts\": 0.000005,\n \"samples_ns\": [ 673981996528, 673974314447, 673973787676 ],\n \"samples_ts\": [ 0.759664, 0.759673, 0.759673 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T02:56:56Z\",\n \"avg_ns\": 193375093066,\n \"stddev_ns\": 6685959,\n \"avg_ts\": 0.661926,\n \"stddev_ts\": 0.000023,\n \"samples_ns\": [ 193382758001, 193370593315, 193371927883 ],\n \"samples_ts\": [ 0.6619, 0.661941, 0.661937 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T02:12:00Z", + "avg_ns": 673976699550, + "stddev_ns": 4667636, + "avg_ts": 0.75967, + "stddev_ts": 5e-06, + "samples_ns": [ + 673981996528, + 673974314447, + 673973787676 + ], + "samples_ts": [ + 0.759664, + 0.759673, + 0.759673 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T02:56:56Z", + "avg_ns": 193375093066, + "stddev_ns": 6685959, + "avg_ts": 0.661926, + "stddev_ts": 2.3e-05, + "samples_ns": [ + 193382758001, + 193370593315, + 193371927883 + ], + "samples_ts": [ + 0.6619, + 0.661941, + 0.661937 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1298 + }, + { + "timestamp_utc": "2025-12-12T04:30:36.418079+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T03:06:39Z\",\n \"avg_ns\": 673995201446,\n \"stddev_ns\": 1803020,\n \"avg_ts\": 0.759649,\n \"stddev_ts\": 0.000002,\n \"samples_ns\": [ 673994082592, 673994779031, 673996742717 ],\n \"samples_ts\": [ 0.759651, 0.75965, 0.759648 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T03:51:35Z\",\n \"avg_ns\": 779814365617,\n \"stddev_ns\": 7234088,\n \"avg_ts\": 0.656567,\n \"stddev_ts\": 0.000006,\n \"samples_ns\": [ 779822415852, 779812271128, 779808409871 ],\n \"samples_ts\": [ 0.65656, 0.656568, 0.656572 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T03:06:39Z", + "avg_ns": 673995201446, + "stddev_ns": 1803020, + "avg_ts": 0.759649, + "stddev_ts": 2e-06, + "samples_ns": [ + 673994082592, + 673994779031, + 673996742717 + ], + "samples_ts": [ + 0.759651, + 0.75965, + 0.759648 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T03:51:35Z", + "avg_ns": 779814365617, + "stddev_ns": 7234088, + "avg_ts": 0.656567, + "stddev_ts": 6e-06, + "samples_ns": [ + 779822415852, + 779812271128, + 779808409871 + ], + "samples_ts": [ + 0.65656, + 0.656568, + 0.656572 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1299 + }, + { + "timestamp_utc": "2025-12-12T04:51:31.215579+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T04:30:37Z\",\n \"avg_ns\": 168048325900,\n \"stddev_ns\": 1345367,\n \"avg_ts\": 0.761686,\n \"stddev_ts\": 0.000006,\n \"samples_ns\": [ 168047773990, 168049859456, 168047344254 ],\n \"samples_ts\": [ 0.761688, 0.761679, 0.76169 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T04:41:49Z\",\n \"avg_ns\": 193244626913,\n \"stddev_ns\": 8365531,\n \"avg_ts\": 0.662373,\n \"stddev_ts\": 0.000029,\n \"samples_ns\": [ 193254111958, 193238427998, 193241340785 ],\n \"samples_ts\": [ 0.66234, 0.662394, 0.662384 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T04:30:37Z", + "avg_ns": 168048325900, + "stddev_ns": 1345367, + "avg_ts": 0.761686, + "stddev_ts": 6e-06, + "samples_ns": [ + 168047773990, + 168049859456, + 168047344254 + ], + "samples_ts": [ + 0.761688, + 0.761679, + 0.76169 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T04:41:49Z", + "avg_ns": 193244626913, + "stddev_ns": 8365531, + "avg_ts": 0.662373, + "stddev_ts": 2.9e-05, + "samples_ns": [ + 193254111958, + 193238427998, + 193241340785 + ], + "samples_ts": [ + 0.66234, + 0.662394, + 0.662384 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1300 + }, + { + "timestamp_utc": "2025-12-12T05:41:45.222387+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T04:51:32Z\",\n \"avg_ns\": 168054228135,\n \"stddev_ns\": 1553772,\n \"avg_ts\": 0.761659,\n \"stddev_ts\": 0.000007,\n \"samples_ns\": [ 168054375662, 168052605861, 168055702882 ],\n \"samples_ts\": [ 0.761658, 0.761666, 0.761652 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T05:02:44Z\",\n \"avg_ns\": 779629168357,\n \"stddev_ns\": 7628248,\n \"avg_ts\": 0.656722,\n \"stddev_ts\": 0.000006,\n \"samples_ns\": [ 779637473245, 779627557920, 779622473906 ],\n \"samples_ts\": [ 0.656715, 0.656724, 0.656728 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T04:51:32Z", + "avg_ns": 168054228135, + "stddev_ns": 1553772, + "avg_ts": 0.761659, + "stddev_ts": 7e-06, + "samples_ns": [ + 168054375662, + 168052605861, + 168055702882 + ], + "samples_ts": [ + 0.761658, + 0.761666, + 0.761652 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T05:02:44Z", + "avg_ns": 779629168357, + "stddev_ns": 7628248, + "avg_ts": 0.656722, + "stddev_ts": 6e-06, + "samples_ns": [ + 779637473245, + 779627557920, + 779622473906 + ], + "samples_ts": [ + 0.656715, + 0.656724, + 0.656728 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1301 + }, + { + "timestamp_utc": "2025-12-12T06:36:25.521219+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T05:41:46Z\",\n \"avg_ns\": 674412872623,\n \"stddev_ns\": 6391049,\n \"avg_ts\": 0.759179,\n \"stddev_ts\": 0.000007,\n \"samples_ns\": [ 674419621778, 674412083039, 674406913052 ],\n \"samples_ts\": [ 0.759171, 0.75918, 0.759186 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T06:26:44Z\",\n \"avg_ns\": 193254805574,\n \"stddev_ns\": 11345277,\n \"avg_ts\": 0.662338,\n \"stddev_ts\": 0.000039,\n \"samples_ns\": [ 193267877911, 193247671433, 193248867379 ],\n \"samples_ts\": [ 0.662293, 0.662362, 0.662358 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T05:41:46Z", + "avg_ns": 674412872623, + "stddev_ns": 6391049, + "avg_ts": 0.759179, + "stddev_ts": 7e-06, + "samples_ns": [ + 674419621778, + 674412083039, + 674406913052 + ], + "samples_ts": [ + 0.759171, + 0.75918, + 0.759186 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T06:26:44Z", + "avg_ns": 193254805574, + "stddev_ns": 11345277, + "avg_ts": 0.662338, + "stddev_ts": 3.9e-05, + "samples_ns": [ + 193267877911, + 193247671433, + 193248867379 + ], + "samples_ts": [ + 0.662293, + 0.662362, + 0.662358 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1302 + }, + { + "timestamp_utc": "2025-12-12T08:00:25.051432+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T06:36:26Z\",\n \"avg_ns\": 674324385610,\n \"stddev_ns\": 2058287,\n \"avg_ts\": 0.759278,\n \"stddev_ts\": 0.000002,\n \"samples_ns\": [ 674324977305, 674322096281, 674326083244 ],\n \"samples_ts\": [ 0.759278, 0.759281, 0.759277 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T07:21:24Z\",\n \"avg_ns\": 779782497211,\n \"stddev_ns\": 4796891,\n \"avg_ts\": 0.656593,\n \"stddev_ts\": 0.000004,\n \"samples_ns\": [ 779787989752, 779780370752, 779779131129 ],\n \"samples_ts\": [ 0.656589, 0.656595, 0.656596 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T06:36:26Z", + "avg_ns": 674324385610, + "stddev_ns": 2058287, + "avg_ts": 0.759278, + "stddev_ts": 2e-06, + "samples_ns": [ + 674324977305, + 674322096281, + 674326083244 + ], + "samples_ts": [ + 0.759278, + 0.759281, + 0.759277 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T07:21:24Z", + "avg_ns": 779782497211, + "stddev_ns": 4796891, + "avg_ts": 0.656593, + "stddev_ts": 4e-06, + "samples_ns": [ + 779787989752, + 779780370752, + 779779131129 + ], + "samples_ts": [ + 0.656589, + 0.656595, + 0.656596 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1303 + }, + { + "timestamp_utc": "2025-12-12T08:21:20.182065+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T08:00:26Z\",\n \"avg_ns\": 168033600395,\n \"stddev_ns\": 1003315,\n \"avg_ts\": 0.761752,\n \"stddev_ts\": 0.000005,\n \"samples_ns\": [ 168034753915, 168033116837, 168032930433 ],\n \"samples_ts\": [ 0.761747, 0.761755, 0.761755 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T08:11:38Z\",\n \"avg_ns\": 193383523000,\n \"stddev_ns\": 543875,\n \"avg_ts\": 0.661897,\n \"stddev_ts\": 0.000001,\n \"samples_ns\": [ 193383892008, 193383321416, 193383355577 ],\n \"samples_ts\": [ 0.661896, 0.661898, 0.661898 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T08:00:26Z", + "avg_ns": 168033600395, + "stddev_ns": 1003315, + "avg_ts": 0.761752, + "stddev_ts": 5e-06, + "samples_ns": [ + 168034753915, + 168033116837, + 168032930433 + ], + "samples_ts": [ + 0.761747, + 0.761755, + 0.761755 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T08:11:38Z", + "avg_ns": 193383523000, + "stddev_ns": 543875, + "avg_ts": 0.661897, + "stddev_ts": 1e-06, + "samples_ns": [ + 193383892008, + 193383321416, + 193383355577 + ], + "samples_ts": [ + 0.661896, + 0.661898, + 0.661898 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1304 + }, + { + "timestamp_utc": "2025-12-12T09:11:31.998380+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T08:21:21Z\",\n \"avg_ns\": 168030291025,\n \"stddev_ns\": 776264,\n \"avg_ts\": 0.761767,\n \"stddev_ts\": 0.000002,\n \"samples_ns\": [ 168030463137, 168029710694, 168030699246 ],\n \"samples_ts\": [ 0.761767, 0.76177, 0.761766 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T08:32:33Z\",\n \"avg_ns\": 778923627411,\n \"stddev_ns\": 5274745,\n \"avg_ts\": 0.657317,\n \"stddev_ts\": 0.000004,\n \"samples_ns\": [ 778929708492, 778920289759, 778920883982 ],\n \"samples_ts\": [ 0.657312, 0.65732, 0.65732 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T08:21:21Z", + "avg_ns": 168030291025, + "stddev_ns": 776264, + "avg_ts": 0.761767, + "stddev_ts": 2e-06, + "samples_ns": [ + 168030463137, + 168029710694, + 168030699246 + ], + "samples_ts": [ + 0.761767, + 0.76177, + 0.761766 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T08:32:33Z", + "avg_ns": 778923627411, + "stddev_ns": 5274745, + "avg_ts": 0.657317, + "stddev_ts": 4e-06, + "samples_ns": [ + 778929708492, + 778920289759, + 778920883982 + ], + "samples_ts": [ + 0.657312, + 0.65732, + 0.65732 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1305 + }, + { + "timestamp_utc": "2025-12-12T10:06:22.390590+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T09:11:33Z\",\n \"avg_ns\": 676792339486,\n \"stddev_ns\": 4222218,\n \"avg_ts\": 0.756510,\n \"stddev_ts\": 0.000005,\n \"samples_ns\": [ 676795530217, 676793716995, 676787771248 ],\n \"samples_ts\": [ 0.756506, 0.756508, 0.756515 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T09:56:40Z\",\n \"avg_ns\": 193427947422,\n \"stddev_ns\": 16227723,\n \"avg_ts\": 0.661745,\n \"stddev_ts\": 0.000055,\n \"samples_ns\": [ 193445853097, 193423737186, 193414251985 ],\n \"samples_ts\": [ 0.661684, 0.66176, 0.661792 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T09:11:33Z", + "avg_ns": 676792339486, + "stddev_ns": 4222218, + "avg_ts": 0.75651, + "stddev_ts": 5e-06, + "samples_ns": [ + 676795530217, + 676793716995, + 676787771248 + ], + "samples_ts": [ + 0.756506, + 0.756508, + 0.756515 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T09:56:40Z", + "avg_ns": 193427947422, + "stddev_ns": 16227723, + "avg_ts": 0.661745, + "stddev_ts": 5.5e-05, + "samples_ns": [ + 193445853097, + 193423737186, + 193414251985 + ], + "samples_ts": [ + 0.661684, + 0.66176, + 0.661792 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1306 + }, + { + "timestamp_utc": "2025-12-12T11:30:32.162622+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T10:06:23Z\",\n \"avg_ns\": 676728337540,\n \"stddev_ns\": 4124352,\n \"avg_ts\": 0.756581,\n \"stddev_ts\": 0.000005,\n \"samples_ns\": [ 676724332001, 676728109385, 676732571234 ],\n \"samples_ts\": [ 0.756586, 0.756582, 0.756577 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T10:51:31Z\",\n \"avg_ns\": 779633624814,\n \"stddev_ns\": 14605597,\n \"avg_ts\": 0.656719,\n \"stddev_ts\": 0.000012,\n \"samples_ns\": [ 779650373867, 779626715134, 779623785442 ],\n \"samples_ts\": [ 0.656705, 0.656725, 0.656727 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T10:06:23Z", + "avg_ns": 676728337540, + "stddev_ns": 4124352, + "avg_ts": 0.756581, + "stddev_ts": 5e-06, + "samples_ns": [ + 676724332001, + 676728109385, + 676732571234 + ], + "samples_ts": [ + 0.756586, + 0.756582, + 0.756577 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T10:51:31Z", + "avg_ns": 779633624814, + "stddev_ns": 14605597, + "avg_ts": 0.656719, + "stddev_ts": 1.2e-05, + "samples_ns": [ + 779650373867, + 779626715134, + 779623785442 + ], + "samples_ts": [ + 0.656705, + 0.656725, + 0.656727 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1307 + }, + { + "timestamp_utc": "2025-12-12T11:51:27.344433+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T11:30:33Z\",\n \"avg_ns\": 168023353303,\n \"stddev_ns\": 1521475,\n \"avg_ts\": 0.761799,\n \"stddev_ts\": 0.000007,\n \"samples_ns\": [ 168021647487, 168024570261, 168023842161 ],\n \"samples_ts\": [ 0.761807, 0.761793, 0.761797 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T11:41:45Z\",\n \"avg_ns\": 193389761207,\n \"stddev_ns\": 3576632,\n \"avg_ts\": 0.661876,\n \"stddev_ts\": 0.000012,\n \"samples_ns\": [ 193393807013, 193388306199, 193387170410 ],\n \"samples_ts\": [ 0.661862, 0.661881, 0.661885 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T11:30:33Z", + "avg_ns": 168023353303, + "stddev_ns": 1521475, + "avg_ts": 0.761799, + "stddev_ts": 7e-06, + "samples_ns": [ + 168021647487, + 168024570261, + 168023842161 + ], + "samples_ts": [ + 0.761807, + 0.761793, + 0.761797 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T11:41:45Z", + "avg_ns": 193389761207, + "stddev_ns": 3576632, + "avg_ts": 0.661876, + "stddev_ts": 1.2e-05, + "samples_ns": [ + 193393807013, + 193388306199, + 193387170410 + ], + "samples_ts": [ + 0.661862, + 0.661881, + 0.661885 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1308 + }, + { + "timestamp_utc": "2025-12-12T12:41:41.536933+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T11:51:28Z\",\n \"avg_ns\": 168053705304,\n \"stddev_ns\": 549684,\n \"avg_ts\": 0.761661,\n \"stddev_ts\": 0.000002,\n \"samples_ns\": [ 168053352431, 168053679975, 168054083507 ],\n \"samples_ts\": [ 0.761663, 0.761661, 0.76166 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T12:02:40Z\",\n \"avg_ns\": 779687846758,\n \"stddev_ns\": 11085028,\n \"avg_ts\": 0.656673,\n \"stddev_ts\": 0.000009,\n \"samples_ns\": [ 779700311957, 779683801075, 779679427244 ],\n \"samples_ts\": [ 0.656663, 0.656676, 0.65668 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T11:51:28Z", + "avg_ns": 168053705304, + "stddev_ns": 549684, + "avg_ts": 0.761661, + "stddev_ts": 2e-06, + "samples_ns": [ + 168053352431, + 168053679975, + 168054083507 + ], + "samples_ts": [ + 0.761663, + 0.761661, + 0.76166 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T12:02:40Z", + "avg_ns": 779687846758, + "stddev_ns": 11085028, + "avg_ts": 0.656673, + "stddev_ts": 9e-06, + "samples_ns": [ + 779700311957, + 779683801075, + 779679427244 + ], + "samples_ts": [ + 0.656663, + 0.656676, + 0.65668 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1309 + }, + { + "timestamp_utc": "2025-12-12T13:36:20.735797+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T12:41:42Z\",\n \"avg_ns\": 674183600693,\n \"stddev_ns\": 11016353,\n \"avg_ts\": 0.759437,\n \"stddev_ts\": 0.000012,\n \"samples_ns\": [ 674171889634, 674185314968, 674193597479 ],\n \"samples_ts\": [ 0.75945, 0.759435, 0.759426 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T13:26:39Z\",\n \"avg_ns\": 193183105356,\n \"stddev_ns\": 3289573,\n \"avg_ts\": 0.662584,\n \"stddev_ts\": 0.000011,\n \"samples_ns\": [ 193186660187, 193182306367, 193180349516 ],\n \"samples_ts\": [ 0.662572, 0.662587, 0.662593 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T12:41:42Z", + "avg_ns": 674183600693, + "stddev_ns": 11016353, + "avg_ts": 0.759437, + "stddev_ts": 1.2e-05, + "samples_ns": [ + 674171889634, + 674185314968, + 674193597479 + ], + "samples_ts": [ + 0.75945, + 0.759435, + 0.759426 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T13:26:39Z", + "avg_ns": 193183105356, + "stddev_ns": 3289573, + "avg_ts": 0.662584, + "stddev_ts": 1.1e-05, + "samples_ns": [ + 193186660187, + 193182306367, + 193180349516 + ], + "samples_ts": [ + 0.662572, + 0.662587, + 0.662593 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1310 + }, + { + "timestamp_utc": "2025-12-12T15:00:17.486725+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T13:36:21Z\",\n \"avg_ns\": 673986951734,\n \"stddev_ns\": 4346193,\n \"avg_ts\": 0.759659,\n \"stddev_ts\": 0.000005,\n \"samples_ns\": [ 673982390801, 673990624982, 673987839421 ],\n \"samples_ts\": [ 0.759664, 0.759654, 0.759658 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T14:21:17Z\",\n \"avg_ns\": 779293294372,\n \"stddev_ns\": 6354442,\n \"avg_ts\": 0.657006,\n \"stddev_ts\": 0.000005,\n \"samples_ns\": [ 779300005039, 779292508892, 779287369185 ],\n \"samples_ts\": [ 0.657, 0.657006, 0.657011 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T13:36:21Z", + "avg_ns": 673986951734, + "stddev_ns": 4346193, + "avg_ts": 0.759659, + "stddev_ts": 5e-06, + "samples_ns": [ + 673982390801, + 673990624982, + 673987839421 + ], + "samples_ts": [ + 0.759664, + 0.759654, + 0.759658 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T14:21:17Z", + "avg_ns": 779293294372, + "stddev_ns": 6354442, + "avg_ts": 0.657006, + "stddev_ts": 5e-06, + "samples_ns": [ + 779300005039, + 779292508892, + 779287369185 + ], + "samples_ts": [ + 0.657, + 0.657006, + 0.657011 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1311 + }, + { + "timestamp_utc": "2025-12-12T15:21:12.788544+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T15:00:18Z\",\n \"avg_ns\": 168023891576,\n \"stddev_ns\": 1334792,\n \"avg_ts\": 0.761796,\n \"stddev_ts\": 0.000006,\n \"samples_ns\": [ 168022486833, 168024228339, 168024959557 ],\n \"samples_ts\": [ 0.761803, 0.761795, 0.761792 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T15:11:31Z\",\n \"avg_ns\": 193363517394,\n \"stddev_ns\": 7302843,\n \"avg_ts\": 0.661966,\n \"stddev_ts\": 0.000025,\n \"samples_ns\": [ 193371831717, 193360408487, 193358311980 ],\n \"samples_ts\": [ 0.661937, 0.661976, 0.661983 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T15:00:18Z", + "avg_ns": 168023891576, + "stddev_ns": 1334792, + "avg_ts": 0.761796, + "stddev_ts": 6e-06, + "samples_ns": [ + 168022486833, + 168024228339, + 168024959557 + ], + "samples_ts": [ + 0.761803, + 0.761795, + 0.761792 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T15:11:31Z", + "avg_ns": 193363517394, + "stddev_ns": 7302843, + "avg_ts": 0.661966, + "stddev_ts": 2.5e-05, + "samples_ns": [ + 193371831717, + 193360408487, + 193358311980 + ], + "samples_ts": [ + 0.661937, + 0.661976, + 0.661983 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1312 + }, + { + "timestamp_utc": "2025-12-12T16:11:26.649816+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T15:21:14Z\",\n \"avg_ns\": 168019548686,\n \"stddev_ns\": 1358757,\n \"avg_ts\": 0.761816,\n \"stddev_ts\": 0.000006,\n \"samples_ns\": [ 168019401302, 168020911530, 168018333227 ],\n \"samples_ts\": [ 0.761817, 0.76181, 0.761822 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T15:32:26Z\",\n \"avg_ns\": 779611251499,\n \"stddev_ns\": 10459364,\n \"avg_ts\": 0.656738,\n \"stddev_ts\": 0.000009,\n \"samples_ns\": [ 779621282794, 779611925222, 779600546483 ],\n \"samples_ts\": [ 0.656729, 0.656737, 0.656747 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T15:21:14Z", + "avg_ns": 168019548686, + "stddev_ns": 1358757, + "avg_ts": 0.761816, + "stddev_ts": 6e-06, + "samples_ns": [ + 168019401302, + 168020911530, + 168018333227 + ], + "samples_ts": [ + 0.761817, + 0.76181, + 0.761822 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T15:32:26Z", + "avg_ns": 779611251499, + "stddev_ns": 10459364, + "avg_ts": 0.656738, + "stddev_ts": 9e-06, + "samples_ns": [ + 779621282794, + 779611925222, + 779600546483 + ], + "samples_ts": [ + 0.656729, + 0.656737, + 0.656747 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1313 + }, + { + "timestamp_utc": "2025-12-12T17:06:06.654035+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T16:11:27Z\",\n \"avg_ns\": 674307838364,\n \"stddev_ns\": 8101791,\n \"avg_ts\": 0.759297,\n \"stddev_ts\": 0.000009,\n \"samples_ns\": [ 674298730583, 674310951198, 674313833313 ],\n \"samples_ts\": [ 0.759307, 0.759294, 0.75929 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T16:56:25Z\",\n \"avg_ns\": 193294210027,\n \"stddev_ns\": 11521281,\n \"avg_ts\": 0.662203,\n \"stddev_ts\": 0.000039,\n \"samples_ns\": [ 193286263913, 193307423451, 193288942717 ],\n \"samples_ts\": [ 0.66223, 0.662158, 0.662221 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T16:11:27Z", + "avg_ns": 674307838364, + "stddev_ns": 8101791, + "avg_ts": 0.759297, + "stddev_ts": 9e-06, + "samples_ns": [ + 674298730583, + 674310951198, + 674313833313 + ], + "samples_ts": [ + 0.759307, + 0.759294, + 0.75929 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T16:56:25Z", + "avg_ns": 193294210027, + "stddev_ns": 11521281, + "avg_ts": 0.662203, + "stddev_ts": 3.9e-05, + "samples_ns": [ + 193286263913, + 193307423451, + 193288942717 + ], + "samples_ts": [ + 0.66223, + 0.662158, + 0.662221 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1314 + }, + { + "timestamp_utc": "2025-12-12T18:30:07.104887+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T17:06:07Z\",\n \"avg_ns\": 674310786681,\n \"stddev_ns\": 5988787,\n \"avg_ts\": 0.759294,\n \"stddev_ts\": 0.000007,\n \"samples_ns\": [ 674317635866, 674307450250, 674307273928 ],\n \"samples_ts\": [ 0.759286, 0.759298, 0.759298 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T17:51:05Z\",\n \"avg_ns\": 780073574930,\n \"stddev_ns\": 20896128,\n \"avg_ts\": 0.656348,\n \"stddev_ts\": 0.000018,\n \"samples_ns\": [ 780091587197, 780078416423, 780050721172 ],\n \"samples_ts\": [ 0.656333, 0.656344, 0.656368 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T17:06:07Z", + "avg_ns": 674310786681, + "stddev_ns": 5988787, + "avg_ts": 0.759294, + "stddev_ts": 7e-06, + "samples_ns": [ + 674317635866, + 674307450250, + 674307273928 + ], + "samples_ts": [ + 0.759286, + 0.759298, + 0.759298 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T17:51:05Z", + "avg_ns": 780073574930, + "stddev_ns": 20896128, + "avg_ts": 0.656348, + "stddev_ts": 1.8e-05, + "samples_ns": [ + 780091587197, + 780078416423, + 780050721172 + ], + "samples_ts": [ + 0.656333, + 0.656344, + 0.656368 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1315 + }, + { + "timestamp_utc": "2025-12-12T18:51:02.734796+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T18:30:08Z\",\n \"avg_ns\": 168031532161,\n \"stddev_ns\": 1130590,\n \"avg_ts\": 0.761762,\n \"stddev_ts\": 0.000005,\n \"samples_ns\": [ 168030427017, 168032686586, 168031482880 ],\n \"samples_ts\": [ 0.761767, 0.761757, 0.761762 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T18:41:20Z\",\n \"avg_ns\": 193536270140,\n \"stddev_ns\": 998830,\n \"avg_ts\": 0.661375,\n \"stddev_ts\": 0.000003,\n \"samples_ns\": [ 193536017109, 193535527112, 193537266200 ],\n \"samples_ts\": [ 0.661376, 0.661377, 0.661371 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T18:30:08Z", + "avg_ns": 168031532161, + "stddev_ns": 1130590, + "avg_ts": 0.761762, + "stddev_ts": 5e-06, + "samples_ns": [ + 168030427017, + 168032686586, + 168031482880 + ], + "samples_ts": [ + 0.761767, + 0.761757, + 0.761762 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T18:41:20Z", + "avg_ns": 193536270140, + "stddev_ns": 998830, + "avg_ts": 0.661375, + "stddev_ts": 3e-06, + "samples_ns": [ + 193536017109, + 193535527112, + 193537266200 + ], + "samples_ts": [ + 0.661376, + 0.661377, + 0.661371 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1316 + }, + { + "timestamp_utc": "2025-12-12T19:41:17.214206+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T18:51:03Z\",\n \"avg_ns\": 168039502625,\n \"stddev_ns\": 744979,\n \"avg_ts\": 0.761726,\n \"stddev_ts\": 0.000003,\n \"samples_ns\": [ 168038994633, 168040196408, 168039316835 ],\n \"samples_ts\": [ 0.761728, 0.761723, 0.761726 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T19:02:16Z\",\n \"avg_ns\": 779804587676,\n \"stddev_ns\": 33435099,\n \"avg_ts\": 0.656575,\n \"stddev_ts\": 0.000028,\n \"samples_ns\": [ 779771288686, 779804340160, 779838134183 ],\n \"samples_ts\": [ 0.656603, 0.656575, 0.656547 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T18:51:03Z", + "avg_ns": 168039502625, + "stddev_ns": 744979, + "avg_ts": 0.761726, + "stddev_ts": 3e-06, + "samples_ns": [ + 168038994633, + 168040196408, + 168039316835 + ], + "samples_ts": [ + 0.761728, + 0.761723, + 0.761726 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T19:02:16Z", + "avg_ns": 779804587676, + "stddev_ns": 33435099, + "avg_ts": 0.656575, + "stddev_ts": 2.8e-05, + "samples_ns": [ + 779771288686, + 779804340160, + 779838134183 + ], + "samples_ts": [ + 0.656603, + 0.656575, + 0.656547 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1317 + }, + { + "timestamp_utc": "2025-12-12T20:36:06.867688+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T19:41:18Z\",\n \"avg_ns\": 676776002853,\n \"stddev_ns\": 7338399,\n \"avg_ts\": 0.756528,\n \"stddev_ts\": 0.000008,\n \"samples_ns\": [ 676768954164, 676783516222, 676775538174 ],\n \"samples_ts\": [ 0.756536, 0.75652, 0.756529 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T20:26:25Z\",\n \"avg_ns\": 193204537758,\n \"stddev_ns\": 8962273,\n \"avg_ts\": 0.662510,\n \"stddev_ts\": 0.000031,\n \"samples_ns\": [ 193214774854, 193198182060, 193200656361 ],\n \"samples_ts\": [ 0.662475, 0.662532, 0.662524 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T19:41:18Z", + "avg_ns": 676776002853, + "stddev_ns": 7338399, + "avg_ts": 0.756528, + "stddev_ts": 8e-06, + "samples_ns": [ + 676768954164, + 676783516222, + 676775538174 + ], + "samples_ts": [ + 0.756536, + 0.75652, + 0.756529 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T20:26:25Z", + "avg_ns": 193204537758, + "stddev_ns": 8962273, + "avg_ts": 0.66251, + "stddev_ts": 3.1e-05, + "samples_ns": [ + 193214774854, + 193198182060, + 193200656361 + ], + "samples_ts": [ + 0.662475, + 0.662532, + 0.662524 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1318 + }, + { + "timestamp_utc": "2025-12-12T22:00:15.370573+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T20:36:08Z\",\n \"avg_ns\": 676883679723,\n \"stddev_ns\": 5679612,\n \"avg_ts\": 0.756408,\n \"stddev_ts\": 0.000006,\n \"samples_ns\": [ 676878222090, 676883368508, 676889448572 ],\n \"samples_ts\": [ 0.756414, 0.756408, 0.756401 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T21:21:15Z\",\n \"avg_ns\": 779342256750,\n \"stddev_ns\": 18544489,\n \"avg_ts\": 0.656964,\n \"stddev_ts\": 0.000016,\n \"samples_ns\": [ 779363267472, 779335330532, 779328172246 ],\n \"samples_ts\": [ 0.656947, 0.65697, 0.656976 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T20:36:08Z", + "avg_ns": 676883679723, + "stddev_ns": 5679612, + "avg_ts": 0.756408, + "stddev_ts": 6e-06, + "samples_ns": [ + 676878222090, + 676883368508, + 676889448572 + ], + "samples_ts": [ + 0.756414, + 0.756408, + 0.756401 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T21:21:15Z", + "avg_ns": 779342256750, + "stddev_ns": 18544489, + "avg_ts": 0.656964, + "stddev_ts": 1.6e-05, + "samples_ns": [ + 779363267472, + 779335330532, + 779328172246 + ], + "samples_ts": [ + 0.656947, + 0.65697, + 0.656976 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1319 + }, + { + "timestamp_utc": "2025-12-12T22:21:10.783644+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T22:00:16Z\",\n \"avg_ns\": 168055579157,\n \"stddev_ns\": 1146692,\n \"avg_ts\": 0.761653,\n \"stddev_ts\": 0.000004,\n \"samples_ns\": [ 168055652594, 168056529733, 168054555146 ],\n \"samples_ts\": [ 0.761652, 0.761648, 0.761657 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T22:11:28Z\",\n \"avg_ns\": 193437092946,\n \"stddev_ns\": 1389001,\n \"avg_ts\": 0.661714,\n \"stddev_ts\": 0.000005,\n \"samples_ns\": [ 193438675868, 193436525304, 193436077666 ],\n \"samples_ts\": [ 0.661708, 0.661716, 0.661717 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T22:00:16Z", + "avg_ns": 168055579157, + "stddev_ns": 1146692, + "avg_ts": 0.761653, + "stddev_ts": 4e-06, + "samples_ns": [ + 168055652594, + 168056529733, + 168054555146 + ], + "samples_ts": [ + 0.761652, + 0.761648, + 0.761657 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T22:11:28Z", + "avg_ns": 193437092946, + "stddev_ns": 1389001, + "avg_ts": 0.661714, + "stddev_ts": 5e-06, + "samples_ns": [ + 193438675868, + 193436525304, + 193436077666 + ], + "samples_ts": [ + 0.661708, + 0.661716, + 0.661717 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1320 + }, + { + "timestamp_utc": "2025-12-12T23:11:24.893638+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T22:21:12Z\",\n \"avg_ns\": 168028047007,\n \"stddev_ns\": 600287,\n \"avg_ts\": 0.761778,\n \"stddev_ts\": 0.000001,\n \"samples_ns\": [ 168028099499, 168028169836, 168027871688 ],\n \"samples_ts\": [ 0.761777, 0.761777, 0.761778 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T22:32:24Z\",\n \"avg_ns\": 779681896156,\n \"stddev_ns\": 24965942,\n \"avg_ts\": 0.656678,\n \"stddev_ts\": 0.000021,\n \"samples_ns\": [ 779710632411, 779669080347, 779665975712 ],\n \"samples_ts\": [ 0.656654, 0.656689, 0.656691 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T22:21:12Z", + "avg_ns": 168028047007, + "stddev_ns": 600287, + "avg_ts": 0.761778, + "stddev_ts": 1e-06, + "samples_ns": [ + 168028099499, + 168028169836, + 168027871688 + ], + "samples_ts": [ + 0.761777, + 0.761777, + 0.761778 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-12T22:32:24Z", + "avg_ns": 779681896156, + "stddev_ns": 24965942, + "avg_ts": 0.656678, + "stddev_ts": 2.1e-05, + "samples_ns": [ + 779710632411, + 779669080347, + 779665975712 + ], + "samples_ts": [ + 0.656654, + 0.656689, + 0.656691 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1321 + }, + { + "timestamp_utc": "2025-12-13T00:06:03.724292+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T23:11:26Z\",\n \"avg_ns\": 673953893562,\n \"stddev_ns\": 3102095,\n \"avg_ts\": 0.759696,\n \"stddev_ts\": 0.000003,\n \"samples_ns\": [ 673955919378, 673950600931, 673955160379 ],\n \"samples_ts\": [ 0.759694, 0.7597, 0.759694 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-12T23:56:21Z\",\n \"avg_ns\": 193368985761,\n \"stddev_ns\": 5205963,\n \"avg_ts\": 0.661947,\n \"stddev_ts\": 0.000018,\n \"samples_ns\": [ 193372903818, 193370974982, 193363078483 ],\n \"samples_ts\": [ 0.661933, 0.66194, 0.661967 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-12T23:11:26Z", + "avg_ns": 673953893562, + "stddev_ns": 3102095, + "avg_ts": 0.759696, + "stddev_ts": 3e-06, + "samples_ns": [ + 673955919378, + 673950600931, + 673955160379 + ], + "samples_ts": [ + 0.759694, + 0.7597, + 0.759694 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-12T23:56:21Z", + "avg_ns": 193368985761, + "stddev_ns": 5205963, + "avg_ts": 0.661947, + "stddev_ts": 1.8e-05, + "samples_ns": [ + 193372903818, + 193370974982, + 193363078483 + ], + "samples_ts": [ + 0.661933, + 0.66194, + 0.661967 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1322 + }, + { + "timestamp_utc": "2025-12-13T01:30:01.228939+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T00:06:04Z\",\n \"avg_ns\": 673982947258,\n \"stddev_ns\": 2544227,\n \"avg_ts\": 0.759663,\n \"stddev_ts\": 0.000003,\n \"samples_ns\": [ 673984412055, 673980009441, 673984420278 ],\n \"samples_ts\": [ 0.759661, 0.759666, 0.759661 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T00:51:00Z\",\n \"avg_ns\": 779535930430,\n \"stddev_ns\": 2228063,\n \"avg_ts\": 0.656801,\n \"stddev_ts\": 0.000002,\n \"samples_ns\": [ 779534968337, 779538279768, 779534543186 ],\n \"samples_ts\": [ 0.656802, 0.656799, 0.656802 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T00:06:04Z", + "avg_ns": 673982947258, + "stddev_ns": 2544227, + "avg_ts": 0.759663, + "stddev_ts": 3e-06, + "samples_ns": [ + 673984412055, + 673980009441, + 673984420278 + ], + "samples_ts": [ + 0.759661, + 0.759666, + 0.759661 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T00:51:00Z", + "avg_ns": 779535930430, + "stddev_ns": 2228063, + "avg_ts": 0.656801, + "stddev_ts": 2e-06, + "samples_ns": [ + 779534968337, + 779538279768, + 779534543186 + ], + "samples_ts": [ + 0.656802, + 0.656799, + 0.656802 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1323 + }, + { + "timestamp_utc": "2025-12-13T01:50:55.857716+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T01:30:02Z\",\n \"avg_ns\": 168015556952,\n \"stddev_ns\": 1474123,\n \"avg_ts\": 0.761834,\n \"stddev_ts\": 0.000007,\n \"samples_ns\": [ 168017253863, 168014592699, 168014824294 ],\n \"samples_ts\": [ 0.761827, 0.761839, 0.761838 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T01:41:14Z\",\n \"avg_ns\": 193227216790,\n \"stddev_ns\": 3204510,\n \"avg_ts\": 0.662433,\n \"stddev_ts\": 0.000011,\n \"samples_ns\": [ 193223611741, 193228446276, 193229592354 ],\n \"samples_ts\": [ 0.662445, 0.662428, 0.662424 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T01:30:02Z", + "avg_ns": 168015556952, + "stddev_ns": 1474123, + "avg_ts": 0.761834, + "stddev_ts": 7e-06, + "samples_ns": [ + 168017253863, + 168014592699, + 168014824294 + ], + "samples_ts": [ + 0.761827, + 0.761839, + 0.761838 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T01:41:14Z", + "avg_ns": 193227216790, + "stddev_ns": 3204510, + "avg_ts": 0.662433, + "stddev_ts": 1.1e-05, + "samples_ns": [ + 193223611741, + 193228446276, + 193229592354 + ], + "samples_ts": [ + 0.662445, + 0.662428, + 0.662424 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1324 + }, + { + "timestamp_utc": "2025-12-13T02:41:09.457429+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T01:50:57Z\",\n \"avg_ns\": 168022011609,\n \"stddev_ns\": 1076172,\n \"avg_ts\": 0.761805,\n \"stddev_ts\": 0.000005,\n \"samples_ns\": [ 168020868007, 168022487208, 168022679613 ],\n \"samples_ts\": [ 0.76181, 0.761803, 0.761802 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T02:02:09Z\",\n \"avg_ns\": 779533047288,\n \"stddev_ns\": 15055328,\n \"avg_ts\": 0.656803,\n \"stddev_ts\": 0.000013,\n \"samples_ns\": [ 779549556373, 779529341532, 779520243961 ],\n \"samples_ts\": [ 0.65679, 0.656807, 0.656814 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T01:50:57Z", + "avg_ns": 168022011609, + "stddev_ns": 1076172, + "avg_ts": 0.761805, + "stddev_ts": 5e-06, + "samples_ns": [ + 168020868007, + 168022487208, + 168022679613 + ], + "samples_ts": [ + 0.76181, + 0.761803, + 0.761802 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T02:02:09Z", + "avg_ns": 779533047288, + "stddev_ns": 15055328, + "avg_ts": 0.656803, + "stddev_ts": 1.3e-05, + "samples_ns": [ + 779549556373, + 779529341532, + 779520243961 + ], + "samples_ts": [ + 0.65679, + 0.656807, + 0.656814 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1325 + }, + { + "timestamp_utc": "2025-12-13T03:35:49.516409+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T02:41:10Z\",\n \"avg_ns\": 674296542488,\n \"stddev_ns\": 2147184,\n \"avg_ts\": 0.759310,\n \"stddev_ts\": 0.000002,\n \"samples_ns\": [ 674297906415, 674297226756, 674294494295 ],\n \"samples_ts\": [ 0.759308, 0.759309, 0.759312 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T03:26:07Z\",\n \"avg_ns\": 193315878233,\n \"stddev_ns\": 5150994,\n \"avg_ts\": 0.662129,\n \"stddev_ts\": 0.000018,\n \"samples_ns\": [ 193321763102, 193312331097, 193313540501 ],\n \"samples_ts\": [ 0.662109, 0.662141, 0.662137 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T02:41:10Z", + "avg_ns": 674296542488, + "stddev_ns": 2147184, + "avg_ts": 0.75931, + "stddev_ts": 2e-06, + "samples_ns": [ + 674297906415, + 674297226756, + 674294494295 + ], + "samples_ts": [ + 0.759308, + 0.759309, + 0.759312 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T03:26:07Z", + "avg_ns": 193315878233, + "stddev_ns": 5150994, + "avg_ts": 0.662129, + "stddev_ts": 1.8e-05, + "samples_ns": [ + 193321763102, + 193312331097, + 193313540501 + ], + "samples_ts": [ + 0.662109, + 0.662141, + 0.662137 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1326 + }, + { + "timestamp_utc": "2025-12-13T04:59:47.192416+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T03:35:50Z\",\n \"avg_ns\": 674286673891,\n \"stddev_ns\": 4488874,\n \"avg_ts\": 0.759321,\n \"stddev_ts\": 0.000005,\n \"samples_ns\": [ 674286025316, 674282620940, 674291375418 ],\n \"samples_ts\": [ 0.759322, 0.759326, 0.759316 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T04:20:47Z\",\n \"avg_ns\": 779192846415,\n \"stddev_ns\": 5955329,\n \"avg_ts\": 0.657090,\n \"stddev_ts\": 0.000005,\n \"samples_ns\": [ 779198427046, 779193421984, 779186690216 ],\n \"samples_ts\": [ 0.657086, 0.65709, 0.657095 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T03:35:50Z", + "avg_ns": 674286673891, + "stddev_ns": 4488874, + "avg_ts": 0.759321, + "stddev_ts": 5e-06, + "samples_ns": [ + 674286025316, + 674282620940, + 674291375418 + ], + "samples_ts": [ + 0.759322, + 0.759326, + 0.759316 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T04:20:47Z", + "avg_ns": 779192846415, + "stddev_ns": 5955329, + "avg_ts": 0.65709, + "stddev_ts": 5e-06, + "samples_ns": [ + 779198427046, + 779193421984, + 779186690216 + ], + "samples_ts": [ + 0.657086, + 0.65709, + 0.657095 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1327 + }, + { + "timestamp_utc": "2025-12-13T05:20:42.334593+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T04:59:48Z\",\n \"avg_ns\": 168037124575,\n \"stddev_ns\": 588014,\n \"avg_ts\": 0.761736,\n \"stddev_ts\": 0.000003,\n \"samples_ns\": [ 168037774841, 168036630248, 168036968636 ],\n \"samples_ts\": [ 0.761733, 0.761739, 0.761737 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T05:11:00Z\",\n \"avg_ns\": 193371094742,\n \"stddev_ns\": 7729272,\n \"avg_ts\": 0.661940,\n \"stddev_ts\": 0.000026,\n \"samples_ns\": [ 193379943863, 193367675890, 193365664473 ],\n \"samples_ts\": [ 0.661909, 0.661951, 0.661958 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T04:59:48Z", + "avg_ns": 168037124575, + "stddev_ns": 588014, + "avg_ts": 0.761736, + "stddev_ts": 3e-06, + "samples_ns": [ + 168037774841, + 168036630248, + 168036968636 + ], + "samples_ts": [ + 0.761733, + 0.761739, + 0.761737 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T05:11:00Z", + "avg_ns": 193371094742, + "stddev_ns": 7729272, + "avg_ts": 0.66194, + "stddev_ts": 2.6e-05, + "samples_ns": [ + 193379943863, + 193367675890, + 193365664473 + ], + "samples_ts": [ + 0.661909, + 0.661951, + 0.661958 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1328 + }, + { + "timestamp_utc": "2025-12-13T06:10:56.779634+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T05:20:43Z\",\n \"avg_ns\": 168040690359,\n \"stddev_ns\": 1098581,\n \"avg_ts\": 0.761720,\n \"stddev_ts\": 0.000005,\n \"samples_ns\": [ 168041599262, 168039588403, 168040883413 ],\n \"samples_ts\": [ 0.761716, 0.761725, 0.761719 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T05:31:55Z\",\n \"avg_ns\": 779790553788,\n \"stddev_ns\": 4384080,\n \"avg_ts\": 0.656587,\n \"stddev_ts\": 0.000004,\n \"samples_ns\": [ 779793107515, 779785491555, 779793062294 ],\n \"samples_ts\": [ 0.656584, 0.656591, 0.656584 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T05:20:43Z", + "avg_ns": 168040690359, + "stddev_ns": 1098581, + "avg_ts": 0.76172, + "stddev_ts": 5e-06, + "samples_ns": [ + 168041599262, + 168039588403, + 168040883413 + ], + "samples_ts": [ + 0.761716, + 0.761725, + 0.761719 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T05:31:55Z", + "avg_ns": 779790553788, + "stddev_ns": 4384080, + "avg_ts": 0.656587, + "stddev_ts": 4e-06, + "samples_ns": [ + 779793107515, + 779785491555, + 779793062294 + ], + "samples_ts": [ + 0.656584, + 0.656591, + 0.656584 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1329 + }, + { + "timestamp_utc": "2025-12-13T07:05:46.225198+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T06:10:58Z\",\n \"avg_ns\": 676716084061,\n \"stddev_ns\": 17880072,\n \"avg_ts\": 0.756595,\n \"stddev_ts\": 0.000020,\n \"samples_ns\": [ 676722856960, 676729588226, 676695806997 ],\n \"samples_ts\": [ 0.756587, 0.75658, 0.756618 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T06:56:04Z\",\n \"avg_ns\": 193215427157,\n \"stddev_ns\": 6202083,\n \"avg_ts\": 0.662473,\n \"stddev_ts\": 0.000021,\n \"samples_ns\": [ 193222564721, 193211605374, 193212111377 ],\n \"samples_ts\": [ 0.662449, 0.662486, 0.662484 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T06:10:58Z", + "avg_ns": 676716084061, + "stddev_ns": 17880072, + "avg_ts": 0.756595, + "stddev_ts": 2e-05, + "samples_ns": [ + 676722856960, + 676729588226, + 676695806997 + ], + "samples_ts": [ + 0.756587, + 0.75658, + 0.756618 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T06:56:04Z", + "avg_ns": 193215427157, + "stddev_ns": 6202083, + "avg_ts": 0.662473, + "stddev_ts": 2.1e-05, + "samples_ns": [ + 193222564721, + 193211605374, + 193212111377 + ], + "samples_ts": [ + 0.662449, + 0.662486, + 0.662484 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1330 + }, + { + "timestamp_utc": "2025-12-13T08:29:54.262511+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "1", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T07:05:47Z\",\n \"avg_ns\": 676759878721,\n \"stddev_ns\": 2583177,\n \"avg_ts\": 0.756546,\n \"stddev_ts\": 0.000003,\n \"samples_ns\": [ 676758183116, 676762851729, 676758601318 ],\n \"samples_ts\": [ 0.756548, 0.756543, 0.756547 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 1,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T07:50:54Z\",\n \"avg_ns\": 779349086770,\n \"stddev_ns\": 54940115,\n \"avg_ts\": 0.656958,\n \"stddev_ts\": 0.000046,\n \"samples_ns\": [ 779412477126, 779315420789, 779319362396 ],\n \"samples_ts\": [ 0.656905, 0.656987, 0.656984 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T07:05:47Z", + "avg_ns": 676759878721, + "stddev_ns": 2583177, + "avg_ts": 0.756546, + "stddev_ts": 3e-06, + "samples_ns": [ + 676758183116, + 676762851729, + 676758601318 + ], + "samples_ts": [ + 0.756548, + 0.756543, + 0.756547 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 1, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T07:50:54Z", + "avg_ns": 779349086770, + "stddev_ns": 54940115, + "avg_ts": 0.656958, + "stddev_ts": 4.6e-05, + "samples_ns": [ + 779412477126, + 779315420789, + 779319362396 + ], + "samples_ts": [ + 0.656905, + 0.656987, + 0.656984 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 1, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1331 + }, + { + "timestamp_utc": "2025-12-13T08:40:39.636426+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T08:29:55Z\",\n \"avg_ns\": 85131239553,\n \"stddev_ns\": 2438518,\n \"avg_ts\": 1.503561,\n \"stddev_ts\": 0.000042,\n \"samples_ns\": [ 85133482891, 85128703075, 85131532695 ],\n \"samples_ts\": [ 1.50352, 1.50361, 1.50356 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T08:35:35Z\",\n \"avg_ns\": 100918349893,\n \"stddev_ns\": 68863766,\n \"avg_ts\": 1.268352,\n \"stddev_ts\": 0.000865,\n \"samples_ns\": [ 100997824407, 100876361074, 100880864198 ],\n \"samples_ts\": [ 1.26735, 1.26888, 1.26882 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T08:29:55Z", + "avg_ns": 85131239553, + "stddev_ns": 2438518, + "avg_ts": 1.503561, + "stddev_ts": 4.2e-05, + "samples_ns": [ + 85133482891, + 85128703075, + 85131532695 + ], + "samples_ts": [ + 1.50352, + 1.50361, + 1.50356 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T08:35:35Z", + "avg_ns": 100918349893, + "stddev_ns": 68863766, + "avg_ts": 1.268352, + "stddev_ts": 0.000865, + "samples_ns": [ + 100997824407, + 100876361074, + 100880864198 + ], + "samples_ts": [ + 1.26735, + 1.26888, + 1.26882 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1332 + }, + { + "timestamp_utc": "2025-12-13T09:06:39.801911+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T08:40:40Z\",\n \"avg_ns\": 84164730480,\n \"stddev_ns\": 2638895,\n \"avg_ts\": 1.520827,\n \"stddev_ts\": 0.000047,\n \"samples_ns\": [ 84163624215, 84167707984, 84162859243 ],\n \"samples_ts\": [ 1.52085, 1.52077, 1.52086 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T08:46:17Z\",\n \"avg_ns\": 407098546397,\n \"stddev_ns\": 23192642,\n \"avg_ts\": 1.257681,\n \"stddev_ts\": 0.000072,\n \"samples_ns\": [ 407112958824, 407110887821, 407071792546 ],\n \"samples_ts\": [ 1.25764, 1.25764, 1.25776 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T08:40:40Z", + "avg_ns": 84164730480, + "stddev_ns": 2638895, + "avg_ts": 1.520827, + "stddev_ts": 4.7e-05, + "samples_ns": [ + 84163624215, + 84167707984, + 84162859243 + ], + "samples_ts": [ + 1.52085, + 1.52077, + 1.52086 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T08:46:17Z", + "avg_ns": 407098546397, + "stddev_ns": 23192642, + "avg_ts": 1.257681, + "stddev_ts": 7.2e-05, + "samples_ns": [ + 407112958824, + 407110887821, + 407071792546 + ], + "samples_ts": [ + 1.25764, + 1.25764, + 1.25776 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1333 + }, + { + "timestamp_utc": "2025-12-13T09:34:15.113853+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T09:06:41Z\",\n \"avg_ns\": 337632528196,\n \"stddev_ns\": 3705140,\n \"avg_ts\": 1.516442,\n \"stddev_ts\": 0.000016,\n \"samples_ns\": [ 337634285836, 337628372820, 337634925934 ],\n \"samples_ts\": [ 1.51643, 1.51646, 1.51643 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T09:29:11Z\",\n \"avg_ns\": 100866333319,\n \"stddev_ns\": 6075681,\n \"avg_ts\": 1.269006,\n \"stddev_ts\": 0.000076,\n \"samples_ns\": [ 100873295883, 100862177449, 100863526626 ],\n \"samples_ts\": [ 1.26892, 1.26906, 1.26904 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T09:06:41Z", + "avg_ns": 337632528196, + "stddev_ns": 3705140, + "avg_ts": 1.516442, + "stddev_ts": 1.6e-05, + "samples_ns": [ + 337634285836, + 337628372820, + 337634925934 + ], + "samples_ts": [ + 1.51643, + 1.51646, + 1.51643 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T09:29:11Z", + "avg_ns": 100866333319, + "stddev_ns": 6075681, + "avg_ts": 1.269006, + "stddev_ts": 7.6e-05, + "samples_ns": [ + 100873295883, + 100862177449, + 100863526626 + ], + "samples_ts": [ + 1.26892, + 1.26906, + 1.26904 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1334 + }, + { + "timestamp_utc": "2025-12-13T10:17:08.666611+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T09:34:16Z\",\n \"avg_ns\": 337640533854,\n \"stddev_ns\": 2266029,\n \"avg_ts\": 1.516406,\n \"stddev_ts\": 0.000010,\n \"samples_ns\": [ 337640381599, 337642872171, 337638347792 ],\n \"samples_ts\": [ 1.51641, 1.5164, 1.51642 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T09:56:46Z\",\n \"avg_ns\": 406927074163,\n \"stddev_ns\": 10046751,\n \"avg_ts\": 1.258211,\n \"stddev_ts\": 0.000031,\n \"samples_ns\": [ 406931139949, 406934429153, 406915653388 ],\n \"samples_ts\": [ 1.2582, 1.25819, 1.25825 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T09:34:16Z", + "avg_ns": 337640533854, + "stddev_ns": 2266029, + "avg_ts": 1.516406, + "stddev_ts": 1e-05, + "samples_ns": [ + 337640381599, + 337642872171, + 337638347792 + ], + "samples_ts": [ + 1.51641, + 1.5164, + 1.51642 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T09:56:46Z", + "avg_ns": 406927074163, + "stddev_ns": 10046751, + "avg_ts": 1.258211, + "stddev_ts": 3.1e-05, + "samples_ns": [ + 406931139949, + 406934429153, + 406915653388 + ], + "samples_ts": [ + 1.2582, + 1.25819, + 1.25825 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1335 + }, + { + "timestamp_utc": "2025-12-13T10:27:49.937355+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T10:17:09Z\",\n \"avg_ns\": 84159763909,\n \"stddev_ns\": 3944109,\n \"avg_ts\": 1.520917,\n \"stddev_ts\": 0.000071,\n \"samples_ns\": [ 84155325111, 84161201979, 84162764639 ],\n \"samples_ts\": [ 1.521, 1.52089, 1.52086 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T10:22:46Z\",\n \"avg_ns\": 100808517923,\n \"stddev_ns\": 3224859,\n \"avg_ts\": 1.269734,\n \"stddev_ts\": 0.000040,\n \"samples_ns\": [ 100812106273, 100805988281, 100807459217 ],\n \"samples_ts\": [ 1.26969, 1.26977, 1.26975 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T10:17:09Z", + "avg_ns": 84159763909, + "stddev_ns": 3944109, + "avg_ts": 1.520917, + "stddev_ts": 7.1e-05, + "samples_ns": [ + 84155325111, + 84161201979, + 84162764639 + ], + "samples_ts": [ + 1.521, + 1.52089, + 1.52086 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T10:22:46Z", + "avg_ns": 100808517923, + "stddev_ns": 3224859, + "avg_ts": 1.269734, + "stddev_ts": 4e-05, + "samples_ns": [ + 100812106273, + 100805988281, + 100807459217 + ], + "samples_ts": [ + 1.26969, + 1.26977, + 1.26975 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1336 + }, + { + "timestamp_utc": "2025-12-13T10:53:49.854357+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T10:27:51Z\",\n \"avg_ns\": 84172732077,\n \"stddev_ns\": 2635847,\n \"avg_ts\": 1.520682,\n \"stddev_ts\": 0.000048,\n \"samples_ns\": [ 84175449740, 84172560016, 84170186475 ],\n \"samples_ts\": [ 1.52063, 1.52069, 1.52073 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T10:33:27Z\",\n \"avg_ns\": 407013637039,\n \"stddev_ns\": 7472474,\n \"avg_ts\": 1.257943,\n \"stddev_ts\": 0.000023,\n \"samples_ns\": [ 407022217093, 407008556517, 407010137507 ],\n \"samples_ts\": [ 1.25792, 1.25796, 1.25795 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T10:27:51Z", + "avg_ns": 84172732077, + "stddev_ns": 2635847, + "avg_ts": 1.520682, + "stddev_ts": 4.8e-05, + "samples_ns": [ + 84175449740, + 84172560016, + 84170186475 + ], + "samples_ts": [ + 1.52063, + 1.52069, + 1.52073 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T10:33:27Z", + "avg_ns": 407013637039, + "stddev_ns": 7472474, + "avg_ts": 1.257943, + "stddev_ts": 2.3e-05, + "samples_ns": [ + 407022217093, + 407008556517, + 407010137507 + ], + "samples_ts": [ + 1.25792, + 1.25796, + 1.25795 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1337 + }, + { + "timestamp_utc": "2025-12-13T11:21:27.452910+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T10:53:51Z\",\n \"avg_ns\": 338210927568,\n \"stddev_ns\": 1763631,\n \"avg_ts\": 1.513848,\n \"stddev_ts\": 0.000007,\n \"samples_ns\": [ 338212382892, 338211119417, 338209280397 ],\n \"samples_ts\": [ 1.51384, 1.51385, 1.51386 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T11:16:24Z\",\n \"avg_ns\": 100824108312,\n \"stddev_ns\": 2667611,\n \"avg_ts\": 1.269538,\n \"stddev_ts\": 0.000033,\n \"samples_ns\": [ 100824759090, 100826370914, 100821194933 ],\n \"samples_ts\": [ 1.26953, 1.26951, 1.26957 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T10:53:51Z", + "avg_ns": 338210927568, + "stddev_ns": 1763631, + "avg_ts": 1.513848, + "stddev_ts": 7e-06, + "samples_ns": [ + 338212382892, + 338211119417, + 338209280397 + ], + "samples_ts": [ + 1.51384, + 1.51385, + 1.51386 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T11:16:24Z", + "avg_ns": 100824108312, + "stddev_ns": 2667611, + "avg_ts": 1.269538, + "stddev_ts": 3.3e-05, + "samples_ns": [ + 100824759090, + 100826370914, + 100821194933 + ], + "samples_ts": [ + 1.26953, + 1.26951, + 1.26957 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1338 + }, + { + "timestamp_utc": "2025-12-13T12:04:24.142759+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T11:21:28Z\",\n \"avg_ns\": 338180579564,\n \"stddev_ns\": 874333,\n \"avg_ts\": 1.513984,\n \"stddev_ts\": 0.000003,\n \"samples_ns\": [ 338180693862, 338181167768, 338179877063 ],\n \"samples_ts\": [ 1.51398, 1.51398, 1.51399 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T11:44:01Z\",\n \"avg_ns\": 407236685756,\n \"stddev_ns\": 7052806,\n \"avg_ts\": 1.257254,\n \"stddev_ts\": 0.000022,\n \"samples_ns\": [ 407244407845, 407230774197, 407234875228 ],\n \"samples_ts\": [ 1.25723, 1.25727, 1.25726 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T11:21:28Z", + "avg_ns": 338180579564, + "stddev_ns": 874333, + "avg_ts": 1.513984, + "stddev_ts": 3e-06, + "samples_ns": [ + 338180693862, + 338181167768, + 338179877063 + ], + "samples_ts": [ + 1.51398, + 1.51398, + 1.51399 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T11:44:01Z", + "avg_ns": 407236685756, + "stddev_ns": 7052806, + "avg_ts": 1.257254, + "stddev_ts": 2.2e-05, + "samples_ns": [ + 407244407845, + 407230774197, + 407234875228 + ], + "samples_ts": [ + 1.25723, + 1.25727, + 1.25726 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1339 + }, + { + "timestamp_utc": "2025-12-13T12:15:05.600843+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T12:04:25Z\",\n \"avg_ns\": 84160897546,\n \"stddev_ns\": 776595,\n \"avg_ts\": 1.520896,\n \"stddev_ts\": 0.000012,\n \"samples_ns\": [ 84161658753, 84160502718, 84160531169 ],\n \"samples_ts\": [ 1.52088, 1.5209, 1.5209 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T12:10:02Z\",\n \"avg_ns\": 100845989123,\n \"stddev_ns\": 4571857,\n \"avg_ts\": 1.269262,\n \"stddev_ts\": 0.000057,\n \"samples_ns\": [ 100850688223, 100845674036, 100841605112 ],\n \"samples_ts\": [ 1.2692, 1.26927, 1.26932 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T12:04:25Z", + "avg_ns": 84160897546, + "stddev_ns": 776595, + "avg_ts": 1.520896, + "stddev_ts": 1.2e-05, + "samples_ns": [ + 84161658753, + 84160502718, + 84160531169 + ], + "samples_ts": [ + 1.52088, + 1.5209, + 1.5209 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T12:10:02Z", + "avg_ns": 100845989123, + "stddev_ns": 4571857, + "avg_ts": 1.269262, + "stddev_ts": 5.7e-05, + "samples_ns": [ + 100850688223, + 100845674036, + 100841605112 + ], + "samples_ts": [ + 1.2692, + 1.26927, + 1.26932 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1340 + }, + { + "timestamp_utc": "2025-12-13T12:41:09.377696+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T12:15:06Z\",\n \"avg_ns\": 85152069416,\n \"stddev_ns\": 3178624,\n \"avg_ts\": 1.503193,\n \"stddev_ts\": 0.000056,\n \"samples_ns\": [ 85148790966, 85152340369, 85155076915 ],\n \"samples_ts\": [ 1.50325, 1.50319, 1.50314 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T12:20:47Z\",\n \"avg_ns\": 406976576429,\n \"stddev_ns\": 34770400,\n \"avg_ts\": 1.258058,\n \"stddev_ts\": 0.000107,\n \"samples_ns\": [ 407015908309, 406963891403, 406949929575 ],\n \"samples_ts\": [ 1.25794, 1.2581, 1.25814 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T12:15:06Z", + "avg_ns": 85152069416, + "stddev_ns": 3178624, + "avg_ts": 1.503193, + "stddev_ts": 5.6e-05, + "samples_ns": [ + 85148790966, + 85152340369, + 85155076915 + ], + "samples_ts": [ + 1.50325, + 1.50319, + 1.50314 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T12:20:47Z", + "avg_ns": 406976576429, + "stddev_ns": 34770400, + "avg_ts": 1.258058, + "stddev_ts": 0.000107, + "samples_ns": [ + 407015908309, + 406963891403, + 406949929575 + ], + "samples_ts": [ + 1.25794, + 1.2581, + 1.25814 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1341 + }, + { + "timestamp_utc": "2025-12-13T13:08:52.741846+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T12:41:10Z\",\n \"avg_ns\": 339686539245,\n \"stddev_ns\": 2493131,\n \"avg_ts\": 1.507272,\n \"stddev_ts\": 0.000011,\n \"samples_ns\": [ 339689328914, 339684946095, 339685342727 ],\n \"samples_ts\": [ 1.50726, 1.50728, 1.50728 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T13:03:49Z\",\n \"avg_ns\": 100778345256,\n \"stddev_ns\": 10287536,\n \"avg_ts\": 1.270114,\n \"stddev_ts\": 0.000130,\n \"samples_ns\": [ 100790204149, 100771908245, 100772923375 ],\n \"samples_ts\": [ 1.26996, 1.2702, 1.27018 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T12:41:10Z", + "avg_ns": 339686539245, + "stddev_ns": 2493131, + "avg_ts": 1.507272, + "stddev_ts": 1.1e-05, + "samples_ns": [ + 339689328914, + 339684946095, + 339685342727 + ], + "samples_ts": [ + 1.50726, + 1.50728, + 1.50728 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T13:03:49Z", + "avg_ns": 100778345256, + "stddev_ns": 10287536, + "avg_ts": 1.270114, + "stddev_ts": 0.00013, + "samples_ns": [ + 100790204149, + 100771908245, + 100772923375 + ], + "samples_ts": [ + 1.26996, + 1.2702, + 1.27018 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1342 + }, + { + "timestamp_utc": "2025-12-13T13:51:55.024932+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T13:08:54Z\",\n \"avg_ns\": 339702175957,\n \"stddev_ns\": 901582,\n \"avg_ts\": 1.507203,\n \"stddev_ts\": 0.000002,\n \"samples_ns\": [ 339701768573, 339702474384, 339702284916 ],\n \"samples_ts\": [ 1.5072, 1.5072, 1.5072 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T13:31:33Z\",\n \"avg_ns\": 406891215792,\n \"stddev_ns\": 10298107,\n \"avg_ts\": 1.258322,\n \"stddev_ts\": 0.000032,\n \"samples_ns\": [ 406903056007, 406884988730, 406885602641 ],\n \"samples_ts\": [ 1.25828, 1.25834, 1.25834 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T13:08:54Z", + "avg_ns": 339702175957, + "stddev_ns": 901582, + "avg_ts": 1.507203, + "stddev_ts": 2e-06, + "samples_ns": [ + 339701768573, + 339702474384, + 339702284916 + ], + "samples_ts": [ + 1.5072, + 1.5072, + 1.5072 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T13:31:33Z", + "avg_ns": 406891215792, + "stddev_ns": 10298107, + "avg_ts": 1.258322, + "stddev_ts": 3.2e-05, + "samples_ns": [ + 406903056007, + 406884988730, + 406885602641 + ], + "samples_ts": [ + 1.25828, + 1.25834, + 1.25834 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1343 + }, + { + "timestamp_utc": "2025-12-13T14:02:36.415547+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T13:51:56Z\",\n \"avg_ns\": 84168098897,\n \"stddev_ns\": 610052,\n \"avg_ts\": 1.520766,\n \"stddev_ts\": 0.000010,\n \"samples_ns\": [ 84167850910, 84168714697, 84167731085 ],\n \"samples_ts\": [ 1.52077, 1.52076, 1.52077 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T13:57:32Z\",\n \"avg_ns\": 100839798784,\n \"stddev_ns\": 3168570,\n \"avg_ts\": 1.269340,\n \"stddev_ts\": 0.000040,\n \"samples_ns\": [ 100843184518, 100836947642, 100839264193 ],\n \"samples_ts\": [ 1.2693, 1.26938, 1.26935 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T13:51:56Z", + "avg_ns": 84168098897, + "stddev_ns": 610052, + "avg_ts": 1.520766, + "stddev_ts": 1e-05, + "samples_ns": [ + 84167850910, + 84168714697, + 84167731085 + ], + "samples_ts": [ + 1.52077, + 1.52076, + 1.52077 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T13:57:32Z", + "avg_ns": 100839798784, + "stddev_ns": 3168570, + "avg_ts": 1.26934, + "stddev_ts": 4e-05, + "samples_ns": [ + 100843184518, + 100836947642, + 100839264193 + ], + "samples_ts": [ + 1.2693, + 1.26938, + 1.26935 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1344 + }, + { + "timestamp_utc": "2025-12-13T14:28:36.468645+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T14:02:37Z\",\n \"avg_ns\": 84158594310,\n \"stddev_ns\": 4254989,\n \"avg_ts\": 1.520938,\n \"stddev_ts\": 0.000077,\n \"samples_ns\": [ 84157230095, 84155188738, 84163364097 ],\n \"samples_ts\": [ 1.52096, 1.521, 1.52085 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T14:08:14Z\",\n \"avg_ns\": 407032008919,\n \"stddev_ns\": 7869458,\n \"avg_ts\": 1.257886,\n \"stddev_ts\": 0.000024,\n \"samples_ns\": [ 407040879110, 407029024379, 407026123270 ],\n \"samples_ts\": [ 1.25786, 1.2579, 1.2579 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T14:02:37Z", + "avg_ns": 84158594310, + "stddev_ns": 4254989, + "avg_ts": 1.520938, + "stddev_ts": 7.7e-05, + "samples_ns": [ + 84157230095, + 84155188738, + 84163364097 + ], + "samples_ts": [ + 1.52096, + 1.521, + 1.52085 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T14:08:14Z", + "avg_ns": 407032008919, + "stddev_ns": 7869458, + "avg_ts": 1.257886, + "stddev_ts": 2.4e-05, + "samples_ns": [ + 407040879110, + 407029024379, + 407026123270 + ], + "samples_ts": [ + 1.25786, + 1.2579, + 1.2579 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1345 + }, + { + "timestamp_utc": "2025-12-13T14:56:11.952753+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T14:28:37Z\",\n \"avg_ns\": 337657983815,\n \"stddev_ns\": 3412615,\n \"avg_ts\": 1.516327,\n \"stddev_ts\": 0.000015,\n \"samples_ns\": [ 337660449287, 337659413227, 337654088931 ],\n \"samples_ts\": [ 1.51632, 1.51632, 1.51634 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T14:51:08Z\",\n \"avg_ns\": 100834981673,\n \"stddev_ns\": 1880542,\n \"avg_ts\": 1.269401,\n \"stddev_ts\": 0.000024,\n \"samples_ns\": [ 100836919170, 100834862053, 100833163796 ],\n \"samples_ts\": [ 1.26938, 1.2694, 1.26942 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T14:28:37Z", + "avg_ns": 337657983815, + "stddev_ns": 3412615, + "avg_ts": 1.516327, + "stddev_ts": 1.5e-05, + "samples_ns": [ + 337660449287, + 337659413227, + 337654088931 + ], + "samples_ts": [ + 1.51632, + 1.51632, + 1.51634 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T14:51:08Z", + "avg_ns": 100834981673, + "stddev_ns": 1880542, + "avg_ts": 1.269401, + "stddev_ts": 2.4e-05, + "samples_ns": [ + 100836919170, + 100834862053, + 100833163796 + ], + "samples_ts": [ + 1.26938, + 1.2694, + 1.26942 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1346 + }, + { + "timestamp_utc": "2025-12-13T15:39:06.096320+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T14:56:13Z\",\n \"avg_ns\": 337654363281,\n \"stddev_ns\": 5614581,\n \"avg_ts\": 1.516344,\n \"stddev_ts\": 0.000025,\n \"samples_ns\": [ 337659633336, 337648563114, 337654893395 ],\n \"samples_ts\": [ 1.51632, 1.51637, 1.51634 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T15:18:43Z\",\n \"avg_ns\": 407104604468,\n \"stddev_ns\": 9580238,\n \"avg_ts\": 1.257662,\n \"stddev_ts\": 0.000029,\n \"samples_ns\": [ 407113898195, 407094840307, 407105074904 ],\n \"samples_ts\": [ 1.25763, 1.25769, 1.25766 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T14:56:13Z", + "avg_ns": 337654363281, + "stddev_ns": 5614581, + "avg_ts": 1.516344, + "stddev_ts": 2.5e-05, + "samples_ns": [ + 337659633336, + 337648563114, + 337654893395 + ], + "samples_ts": [ + 1.51632, + 1.51637, + 1.51634 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T15:18:43Z", + "avg_ns": 407104604468, + "stddev_ns": 9580238, + "avg_ts": 1.257662, + "stddev_ts": 2.9e-05, + "samples_ns": [ + 407113898195, + 407094840307, + 407105074904 + ], + "samples_ts": [ + 1.25763, + 1.25769, + 1.25766 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1347 + }, + { + "timestamp_utc": "2025-12-13T15:49:47.316219+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T15:39:07Z\",\n \"avg_ns\": 84175416635,\n \"stddev_ns\": 1315037,\n \"avg_ts\": 1.520634,\n \"stddev_ts\": 0.000023,\n \"samples_ns\": [ 84174267034, 84176800114, 84175182758 ],\n \"samples_ts\": [ 1.52065, 1.52061, 1.52064 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T15:44:44Z\",\n \"avg_ns\": 100782168244,\n \"stddev_ns\": 5796444,\n \"avg_ts\": 1.270066,\n \"stddev_ts\": 0.000073,\n \"samples_ns\": [ 100788607047, 100777398247, 100780499439 ],\n \"samples_ts\": [ 1.26998, 1.27013, 1.27009 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T15:39:07Z", + "avg_ns": 84175416635, + "stddev_ns": 1315037, + "avg_ts": 1.520634, + "stddev_ts": 2.3e-05, + "samples_ns": [ + 84174267034, + 84176800114, + 84175182758 + ], + "samples_ts": [ + 1.52065, + 1.52061, + 1.52064 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T15:44:44Z", + "avg_ns": 100782168244, + "stddev_ns": 5796444, + "avg_ts": 1.270066, + "stddev_ts": 7.3e-05, + "samples_ns": [ + 100788607047, + 100777398247, + 100780499439 + ], + "samples_ts": [ + 1.26998, + 1.27013, + 1.27009 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1348 + }, + { + "timestamp_utc": "2025-12-13T16:15:46.875126+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T15:49:48Z\",\n \"avg_ns\": 84146820708,\n \"stddev_ns\": 684263,\n \"avg_ts\": 1.521151,\n \"stddev_ts\": 0.000011,\n \"samples_ns\": [ 84147229896, 84147124537, 84146107692 ],\n \"samples_ts\": [ 1.52114, 1.52115, 1.52116 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T15:55:25Z\",\n \"avg_ns\": 406926932855,\n \"stddev_ns\": 7772097,\n \"avg_ts\": 1.258211,\n \"stddev_ts\": 0.000024,\n \"samples_ns\": [ 406933897890, 406918632884, 406928267793 ],\n \"samples_ts\": [ 1.25819, 1.25824, 1.25821 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T15:49:48Z", + "avg_ns": 84146820708, + "stddev_ns": 684263, + "avg_ts": 1.521151, + "stddev_ts": 1.1e-05, + "samples_ns": [ + 84147229896, + 84147124537, + 84146107692 + ], + "samples_ts": [ + 1.52114, + 1.52115, + 1.52116 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T15:55:25Z", + "avg_ns": 406926932855, + "stddev_ns": 7772097, + "avg_ts": 1.258211, + "stddev_ts": 2.4e-05, + "samples_ns": [ + 406933897890, + 406918632884, + 406928267793 + ], + "samples_ts": [ + 1.25819, + 1.25824, + 1.25821 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1349 + }, + { + "timestamp_utc": "2025-12-13T16:43:24.139111+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T16:15:48Z\",\n \"avg_ns\": 338133480258,\n \"stddev_ns\": 2418629,\n \"avg_ts\": 1.514195,\n \"stddev_ts\": 0.000010,\n \"samples_ns\": [ 338132343155, 338131998506, 338136099115 ],\n \"samples_ts\": [ 1.5142, 1.5142, 1.51418 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T16:38:20Z\",\n \"avg_ns\": 100826367401,\n \"stddev_ns\": 3386571,\n \"avg_ts\": 1.269509,\n \"stddev_ts\": 0.000042,\n \"samples_ns\": [ 100830078296, 100823542850, 100825481059 ],\n \"samples_ts\": [ 1.26946, 1.26954, 1.26952 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T16:15:48Z", + "avg_ns": 338133480258, + "stddev_ns": 2418629, + "avg_ts": 1.514195, + "stddev_ts": 1e-05, + "samples_ns": [ + 338132343155, + 338131998506, + 338136099115 + ], + "samples_ts": [ + 1.5142, + 1.5142, + 1.51418 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T16:38:20Z", + "avg_ns": 100826367401, + "stddev_ns": 3386571, + "avg_ts": 1.269509, + "stddev_ts": 4.2e-05, + "samples_ns": [ + 100830078296, + 100823542850, + 100825481059 + ], + "samples_ts": [ + 1.26946, + 1.26954, + 1.26952 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1350 + }, + { + "timestamp_utc": "2025-12-13T17:26:19.416722+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T16:43:25Z\",\n \"avg_ns\": 338075989060,\n \"stddev_ns\": 3420618,\n \"avg_ts\": 1.514452,\n \"stddev_ts\": 0.000015,\n \"samples_ns\": [ 338073232242, 338074987864, 338079747075 ],\n \"samples_ts\": [ 1.51446, 1.51446, 1.51444 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T17:05:57Z\",\n \"avg_ns\": 406903619401,\n \"stddev_ns\": 8967106,\n \"avg_ts\": 1.258283,\n \"stddev_ts\": 0.000028,\n \"samples_ns\": [ 406913897704, 406897603054, 406899357446 ],\n \"samples_ts\": [ 1.25825, 1.2583, 1.2583 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T16:43:25Z", + "avg_ns": 338075989060, + "stddev_ns": 3420618, + "avg_ts": 1.514452, + "stddev_ts": 1.5e-05, + "samples_ns": [ + 338073232242, + 338074987864, + 338079747075 + ], + "samples_ts": [ + 1.51446, + 1.51446, + 1.51444 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T17:05:57Z", + "avg_ns": 406903619401, + "stddev_ns": 8967106, + "avg_ts": 1.258283, + "stddev_ts": 2.8e-05, + "samples_ns": [ + 406913897704, + 406897603054, + 406899357446 + ], + "samples_ts": [ + 1.25825, + 1.2583, + 1.2583 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1351 + }, + { + "timestamp_utc": "2025-12-13T17:37:08.215369+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T17:26:20Z\",\n \"avg_ns\": 85161135650,\n \"stddev_ns\": 2789803,\n \"avg_ts\": 1.503033,\n \"stddev_ts\": 0.000049,\n \"samples_ns\": [ 85158766996, 85164165185, 85160474771 ],\n \"samples_ts\": [ 1.50307, 1.50298, 1.50304 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T17:32:01Z\",\n \"avg_ns\": 101987582030,\n \"stddev_ns\": 612514991,\n \"avg_ts\": 1.255085,\n \"stddev_ts\": 0.007564,\n \"samples_ns\": [ 102344097336, 102338330086, 101280318670 ],\n \"samples_ts\": [ 1.25068, 1.25075, 1.26382 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T17:26:20Z", + "avg_ns": 85161135650, + "stddev_ns": 2789803, + "avg_ts": 1.503033, + "stddev_ts": 4.9e-05, + "samples_ns": [ + 85158766996, + 85164165185, + 85160474771 + ], + "samples_ts": [ + 1.50307, + 1.50298, + 1.50304 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T17:32:01Z", + "avg_ns": 101987582030, + "stddev_ns": 612514991, + "avg_ts": 1.255085, + "stddev_ts": 0.007564, + "samples_ns": [ + 102344097336, + 102338330086, + 101280318670 + ], + "samples_ts": [ + 1.25068, + 1.25075, + 1.26382 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1352 + }, + { + "timestamp_utc": "2025-12-13T18:03:09.897047+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T17:37:09Z\",\n \"avg_ns\": 84147362683,\n \"stddev_ns\": 2418683,\n \"avg_ts\": 1.521141,\n \"stddev_ts\": 0.000043,\n \"samples_ns\": [ 84147971067, 84149383172, 84144733812 ],\n \"samples_ts\": [ 1.52113, 1.5211, 1.52119 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T17:42:46Z\",\n \"avg_ns\": 407627343815,\n \"stddev_ns\": 626509912,\n \"avg_ts\": 1.256051,\n \"stddev_ts\": 0.001929,\n \"samples_ns\": [ 408350751114, 407260613660, 407270666673 ],\n \"samples_ts\": [ 1.25382, 1.25718, 1.25715 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T17:37:09Z", + "avg_ns": 84147362683, + "stddev_ns": 2418683, + "avg_ts": 1.521141, + "stddev_ts": 4.3e-05, + "samples_ns": [ + 84147971067, + 84149383172, + 84144733812 + ], + "samples_ts": [ + 1.52113, + 1.5211, + 1.52119 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T17:42:46Z", + "avg_ns": 407627343815, + "stddev_ns": 626509912, + "avg_ts": 1.256051, + "stddev_ts": 0.001929, + "samples_ns": [ + 408350751114, + 407260613660, + 407270666673 + ], + "samples_ts": [ + 1.25382, + 1.25718, + 1.25715 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1353 + }, + { + "timestamp_utc": "2025-12-13T18:30:53.369852+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T18:03:11Z\",\n \"avg_ns\": 339664920575,\n \"stddev_ns\": 7934378,\n \"avg_ts\": 1.507368,\n \"stddev_ts\": 0.000035,\n \"samples_ns\": [ 339671242904, 339667472188, 339656046634 ],\n \"samples_ts\": [ 1.50734, 1.50736, 1.50741 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T18:25:49Z\",\n \"avg_ns\": 100869005543,\n \"stddev_ns\": 8443774,\n \"avg_ts\": 1.268973,\n \"stddev_ts\": 0.000106,\n \"samples_ns\": [ 100878755516, 100864101363, 100864159750 ],\n \"samples_ts\": [ 1.26885, 1.26903, 1.26903 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T18:03:11Z", + "avg_ns": 339664920575, + "stddev_ns": 7934378, + "avg_ts": 1.507368, + "stddev_ts": 3.5e-05, + "samples_ns": [ + 339671242904, + 339667472188, + 339656046634 + ], + "samples_ts": [ + 1.50734, + 1.50736, + 1.50741 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T18:25:49Z", + "avg_ns": 100869005543, + "stddev_ns": 8443774, + "avg_ts": 1.268973, + "stddev_ts": 0.000106, + "samples_ns": [ + 100878755516, + 100864101363, + 100864159750 + ], + "samples_ts": [ + 1.26885, + 1.26903, + 1.26903 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1354 + }, + { + "timestamp_utc": "2025-12-13T19:13:55.500270+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T18:30:54Z\",\n \"avg_ns\": 339639305530,\n \"stddev_ns\": 11330764,\n \"avg_ts\": 1.507482,\n \"stddev_ts\": 0.000050,\n \"samples_ns\": [ 339631900235, 339633685207, 339652331149 ],\n \"samples_ts\": [ 1.50751, 1.50751, 1.50742 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T18:53:33Z\",\n \"avg_ns\": 407116889685,\n \"stddev_ns\": 39456514,\n \"avg_ts\": 1.257624,\n \"stddev_ts\": 0.000122,\n \"samples_ns\": [ 407162450113, 407094155285, 407094063657 ],\n \"samples_ts\": [ 1.25748, 1.25769, 1.25769 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T18:30:54Z", + "avg_ns": 339639305530, + "stddev_ns": 11330764, + "avg_ts": 1.507482, + "stddev_ts": 5e-05, + "samples_ns": [ + 339631900235, + 339633685207, + 339652331149 + ], + "samples_ts": [ + 1.50751, + 1.50751, + 1.50742 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T18:53:33Z", + "avg_ns": 407116889685, + "stddev_ns": 39456514, + "avg_ts": 1.257624, + "stddev_ts": 0.000122, + "samples_ns": [ + 407162450113, + 407094155285, + 407094063657 + ], + "samples_ts": [ + 1.25748, + 1.25769, + 1.25769 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1355 + }, + { + "timestamp_utc": "2025-12-13T19:24:36.970443+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T19:13:56Z\",\n \"avg_ns\": 84153620104,\n \"stddev_ns\": 3967658,\n \"avg_ts\": 1.521028,\n \"stddev_ts\": 0.000072,\n \"samples_ns\": [ 84150025842, 84152956894, 84157877576 ],\n \"samples_ts\": [ 1.52109, 1.52104, 1.52095 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T19:19:33Z\",\n \"avg_ns\": 100896800734,\n \"stddev_ns\": 3819242,\n \"avg_ts\": 1.268623,\n \"stddev_ts\": 0.000048,\n \"samples_ns\": [ 100901185755, 100894862023, 100894354425 ],\n \"samples_ts\": [ 1.26857, 1.26865, 1.26865 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T19:13:56Z", + "avg_ns": 84153620104, + "stddev_ns": 3967658, + "avg_ts": 1.521028, + "stddev_ts": 7.2e-05, + "samples_ns": [ + 84150025842, + 84152956894, + 84157877576 + ], + "samples_ts": [ + 1.52109, + 1.52104, + 1.52095 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T19:19:33Z", + "avg_ns": 100896800734, + "stddev_ns": 3819242, + "avg_ts": 1.268623, + "stddev_ts": 4.8e-05, + "samples_ns": [ + 100901185755, + 100894862023, + 100894354425 + ], + "samples_ts": [ + 1.26857, + 1.26865, + 1.26865 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1356 + }, + { + "timestamp_utc": "2025-12-13T19:50:37.235423+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T19:24:38Z\",\n \"avg_ns\": 84149865652,\n \"stddev_ns\": 1975101,\n \"avg_ts\": 1.521096,\n \"stddev_ts\": 0.000035,\n \"samples_ns\": [ 84148651823, 84148825800, 84152119334 ],\n \"samples_ts\": [ 1.52112, 1.52111, 1.52105 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T19:30:14Z\",\n \"avg_ns\": 407156990071,\n \"stddev_ns\": 8019103,\n \"avg_ts\": 1.257500,\n \"stddev_ts\": 0.000025,\n \"samples_ns\": [ 407166206822, 407153152831, 407151610560 ],\n \"samples_ts\": [ 1.25747, 1.25751, 1.25752 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T19:24:38Z", + "avg_ns": 84149865652, + "stddev_ns": 1975101, + "avg_ts": 1.521096, + "stddev_ts": 3.5e-05, + "samples_ns": [ + 84148651823, + 84148825800, + 84152119334 + ], + "samples_ts": [ + 1.52112, + 1.52111, + 1.52105 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T19:30:14Z", + "avg_ns": 407156990071, + "stddev_ns": 8019103, + "avg_ts": 1.2575, + "stddev_ts": 2.5e-05, + "samples_ns": [ + 407166206822, + 407153152831, + 407151610560 + ], + "samples_ts": [ + 1.25747, + 1.25751, + 1.25752 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1357 + }, + { + "timestamp_utc": "2025-12-13T20:18:12.157140+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T19:50:38Z\",\n \"avg_ns\": 337592135984,\n \"stddev_ns\": 4007269,\n \"avg_ts\": 1.516623,\n \"stddev_ts\": 0.000018,\n \"samples_ns\": [ 337596690958, 337590258216, 337589458779 ],\n \"samples_ts\": [ 1.5166, 1.51663, 1.51664 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T20:13:08Z\",\n \"avg_ns\": 100793047153,\n \"stddev_ns\": 3951802,\n \"avg_ts\": 1.269929,\n \"stddev_ts\": 0.000050,\n \"samples_ns\": [ 100797579520, 100790450532, 100791111408 ],\n \"samples_ts\": [ 1.26987, 1.26996, 1.26995 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T19:50:38Z", + "avg_ns": 337592135984, + "stddev_ns": 4007269, + "avg_ts": 1.516623, + "stddev_ts": 1.8e-05, + "samples_ns": [ + 337596690958, + 337590258216, + 337589458779 + ], + "samples_ts": [ + 1.5166, + 1.51663, + 1.51664 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T20:13:08Z", + "avg_ns": 100793047153, + "stddev_ns": 3951802, + "avg_ts": 1.269929, + "stddev_ts": 5e-05, + "samples_ns": [ + 100797579520, + 100790450532, + 100791111408 + ], + "samples_ts": [ + 1.26987, + 1.26996, + 1.26995 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1358 + }, + { + "timestamp_utc": "2025-12-13T21:01:06.394341+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T20:18:13Z\",\n \"avg_ns\": 337624166061,\n \"stddev_ns\": 2148426,\n \"avg_ts\": 1.516479,\n \"stddev_ts\": 0.000009,\n \"samples_ns\": [ 337624088717, 337622137459, 337626272008 ],\n \"samples_ts\": [ 1.51648, 1.51649, 1.51647 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T20:40:44Z\",\n \"avg_ns\": 407158331124,\n \"stddev_ns\": 3059459,\n \"avg_ts\": 1.257496,\n \"stddev_ts\": 0.000009,\n \"samples_ns\": [ 407160023000, 407154799398, 407160170974 ],\n \"samples_ts\": [ 1.25749, 1.25751, 1.25749 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T20:18:13Z", + "avg_ns": 337624166061, + "stddev_ns": 2148426, + "avg_ts": 1.516479, + "stddev_ts": 9e-06, + "samples_ns": [ + 337624088717, + 337622137459, + 337626272008 + ], + "samples_ts": [ + 1.51648, + 1.51649, + 1.51647 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T20:40:44Z", + "avg_ns": 407158331124, + "stddev_ns": 3059459, + "avg_ts": 1.257496, + "stddev_ts": 9e-06, + "samples_ns": [ + 407160023000, + 407154799398, + 407160170974 + ], + "samples_ts": [ + 1.25749, + 1.25751, + 1.25749 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1359 + }, + { + "timestamp_utc": "2025-12-13T21:11:47.708554+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T21:01:07Z\",\n \"avg_ns\": 84173036578,\n \"stddev_ns\": 1261284,\n \"avg_ts\": 1.520677,\n \"stddev_ts\": 0.000022,\n \"samples_ns\": [ 84171623148, 84173834542, 84173652045 ],\n \"samples_ts\": [ 1.5207, 1.52066, 1.52067 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T21:06:44Z\",\n \"avg_ns\": 100816072203,\n \"stddev_ns\": 3598302,\n \"avg_ts\": 1.269639,\n \"stddev_ts\": 0.000045,\n \"samples_ns\": [ 100820223984, 100814137096, 100813855529 ],\n \"samples_ts\": [ 1.26959, 1.26966, 1.26967 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T21:01:07Z", + "avg_ns": 84173036578, + "stddev_ns": 1261284, + "avg_ts": 1.520677, + "stddev_ts": 2.2e-05, + "samples_ns": [ + 84171623148, + 84173834542, + 84173652045 + ], + "samples_ts": [ + 1.5207, + 1.52066, + 1.52067 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T21:06:44Z", + "avg_ns": 100816072203, + "stddev_ns": 3598302, + "avg_ts": 1.269639, + "stddev_ts": 4.5e-05, + "samples_ns": [ + 100820223984, + 100814137096, + 100813855529 + ], + "samples_ts": [ + 1.26959, + 1.26966, + 1.26967 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1360 + }, + { + "timestamp_utc": "2025-12-13T21:37:47.068231+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T21:11:48Z\",\n \"avg_ns\": 84163517531,\n \"stddev_ns\": 845016,\n \"avg_ts\": 1.520849,\n \"stddev_ts\": 0.000013,\n \"samples_ns\": [ 84162692545, 84164117836, 84163742214 ],\n \"samples_ts\": [ 1.52086, 1.52084, 1.52084 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T21:17:25Z\",\n \"avg_ns\": 406838663369,\n \"stddev_ns\": 21512168,\n \"avg_ts\": 1.258484,\n \"stddev_ts\": 0.000067,\n \"samples_ns\": [ 406861883983, 406834693462, 406819412662 ],\n \"samples_ts\": [ 1.25841, 1.2585, 1.25854 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T21:11:48Z", + "avg_ns": 84163517531, + "stddev_ns": 845016, + "avg_ts": 1.520849, + "stddev_ts": 1.3e-05, + "samples_ns": [ + 84162692545, + 84164117836, + 84163742214 + ], + "samples_ts": [ + 1.52086, + 1.52084, + 1.52084 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T21:17:25Z", + "avg_ns": 406838663369, + "stddev_ns": 21512168, + "avg_ts": 1.258484, + "stddev_ts": 6.7e-05, + "samples_ns": [ + 406861883983, + 406834693462, + 406819412662 + ], + "samples_ts": [ + 1.25841, + 1.2585, + 1.25854 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1361 + }, + { + "timestamp_utc": "2025-12-13T22:05:24.237204+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T21:37:48Z\",\n \"avg_ns\": 338117584492,\n \"stddev_ns\": 8943417,\n \"avg_ts\": 1.514266,\n \"stddev_ts\": 0.000040,\n \"samples_ns\": [ 338110442289, 338114695994, 338127615193 ],\n \"samples_ts\": [ 1.5143, 1.51428, 1.51422 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T22:00:20Z\",\n \"avg_ns\": 100812724850,\n \"stddev_ns\": 5935648,\n \"avg_ts\": 1.269681,\n \"stddev_ts\": 0.000075,\n \"samples_ns\": [ 100819145825, 100811566051, 100807462675 ],\n \"samples_ts\": [ 1.2696, 1.2697, 1.26975 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T21:37:48Z", + "avg_ns": 338117584492, + "stddev_ns": 8943417, + "avg_ts": 1.514266, + "stddev_ts": 4e-05, + "samples_ns": [ + 338110442289, + 338114695994, + 338127615193 + ], + "samples_ts": [ + 1.5143, + 1.51428, + 1.51422 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T22:00:20Z", + "avg_ns": 100812724850, + "stddev_ns": 5935648, + "avg_ts": 1.269681, + "stddev_ts": 7.5e-05, + "samples_ns": [ + 100819145825, + 100811566051, + 100807462675 + ], + "samples_ts": [ + 1.2696, + 1.2697, + 1.26975 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1362 + }, + { + "timestamp_utc": "2025-12-13T22:48:19.701535+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T22:05:25Z\",\n \"avg_ns\": 338147017560,\n \"stddev_ns\": 5939193,\n \"avg_ts\": 1.514134,\n \"stddev_ts\": 0.000026,\n \"samples_ns\": [ 338145067740, 338153656791, 338142328150 ],\n \"samples_ts\": [ 1.51414, 1.5141, 1.51416 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T22:27:58Z\",\n \"avg_ns\": 406886327425,\n \"stddev_ns\": 6298726,\n \"avg_ts\": 1.258337,\n \"stddev_ts\": 0.000019,\n \"samples_ns\": [ 406893554172, 406883026578, 406882401526 ],\n \"samples_ts\": [ 1.25831, 1.25835, 1.25835 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T22:05:25Z", + "avg_ns": 338147017560, + "stddev_ns": 5939193, + "avg_ts": 1.514134, + "stddev_ts": 2.6e-05, + "samples_ns": [ + 338145067740, + 338153656791, + 338142328150 + ], + "samples_ts": [ + 1.51414, + 1.5141, + 1.51416 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T22:27:58Z", + "avg_ns": 406886327425, + "stddev_ns": 6298726, + "avg_ts": 1.258337, + "stddev_ts": 1.9e-05, + "samples_ns": [ + 406893554172, + 406883026578, + 406882401526 + ], + "samples_ts": [ + 1.25831, + 1.25835, + 1.25835 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1363 + }, + { + "timestamp_utc": "2025-12-13T22:59:01.135485+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T22:48:20Z\",\n \"avg_ns\": 84159117431,\n \"stddev_ns\": 1175921,\n \"avg_ts\": 1.520928,\n \"stddev_ts\": 0.000021,\n \"samples_ns\": [ 84159774828, 84157801563, 84159775903 ],\n \"samples_ts\": [ 1.52092, 1.52095, 1.52092 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T22:53:57Z\",\n \"avg_ns\": 100863536385,\n \"stddev_ns\": 837684,\n \"avg_ts\": 1.269041,\n \"stddev_ts\": 0.000011,\n \"samples_ns\": [ 100864263654, 100862620459, 100863725042 ],\n \"samples_ts\": [ 1.26903, 1.26905, 1.26904 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T22:48:20Z", + "avg_ns": 84159117431, + "stddev_ns": 1175921, + "avg_ts": 1.520928, + "stddev_ts": 2.1e-05, + "samples_ns": [ + 84159774828, + 84157801563, + 84159775903 + ], + "samples_ts": [ + 1.52092, + 1.52095, + 1.52092 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T22:53:57Z", + "avg_ns": 100863536385, + "stddev_ns": 837684, + "avg_ts": 1.269041, + "stddev_ts": 1.1e-05, + "samples_ns": [ + 100864263654, + 100862620459, + 100863725042 + ], + "samples_ts": [ + 1.26903, + 1.26905, + 1.26904 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1364 + }, + { + "timestamp_utc": "2025-12-13T23:25:01.680222+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T22:59:02Z\",\n \"avg_ns\": 84169638320,\n \"stddev_ns\": 1806294,\n \"avg_ts\": 1.520738,\n \"stddev_ts\": 0.000032,\n \"samples_ns\": [ 84171653962, 84168267977, 84168993022 ],\n \"samples_ts\": [ 1.5207, 1.52076, 1.52075 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T23:04:39Z\",\n \"avg_ns\": 407186742611,\n \"stddev_ns\": 3784594,\n \"avg_ts\": 1.257408,\n \"stddev_ts\": 0.000012,\n \"samples_ns\": [ 407190824894, 407183511011, 407185891929 ],\n \"samples_ts\": [ 1.2574, 1.25742, 1.25741 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T22:59:02Z", + "avg_ns": 84169638320, + "stddev_ns": 1806294, + "avg_ts": 1.520738, + "stddev_ts": 3.2e-05, + "samples_ns": [ + 84171653962, + 84168267977, + 84168993022 + ], + "samples_ts": [ + 1.5207, + 1.52076, + 1.52075 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-13T23:04:39Z", + "avg_ns": 407186742611, + "stddev_ns": 3784594, + "avg_ts": 1.257408, + "stddev_ts": 1.2e-05, + "samples_ns": [ + 407190824894, + 407183511011, + 407185891929 + ], + "samples_ts": [ + 1.2574, + 1.25742, + 1.25741 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1365 + }, + { + "timestamp_utc": "2025-12-13T23:52:44.773391+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T23:25:02Z\",\n \"avg_ns\": 339628921678,\n \"stddev_ns\": 5170692,\n \"avg_ts\": 1.507528,\n \"stddev_ts\": 0.000023,\n \"samples_ns\": [ 339633614488, 339629663956, 339623486592 ],\n \"samples_ts\": [ 1.50751, 1.50752, 1.50755 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T23:47:41Z\",\n \"avg_ns\": 100798851932,\n \"stddev_ns\": 4001745,\n \"avg_ts\": 1.269856,\n \"stddev_ts\": 0.000050,\n \"samples_ns\": [ 100803202609, 100797986891, 100795366297 ],\n \"samples_ts\": [ 1.2698, 1.26987, 1.2699 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T23:25:02Z", + "avg_ns": 339628921678, + "stddev_ns": 5170692, + "avg_ts": 1.507528, + "stddev_ts": 2.3e-05, + "samples_ns": [ + 339633614488, + 339629663956, + 339623486592 + ], + "samples_ts": [ + 1.50751, + 1.50752, + 1.50755 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-13T23:47:41Z", + "avg_ns": 100798851932, + "stddev_ns": 4001745, + "avg_ts": 1.269856, + "stddev_ts": 5e-05, + "samples_ns": [ + 100803202609, + 100797986891, + 100795366297 + ], + "samples_ts": [ + 1.2698, + 1.26987, + 1.2699 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1366 + }, + { + "timestamp_utc": "2025-12-14T00:35:47.012879+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "2", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-13T23:52:46Z\",\n \"avg_ns\": 339688941598,\n \"stddev_ns\": 6749890,\n \"avg_ts\": 1.507261,\n \"stddev_ts\": 0.000030,\n \"samples_ns\": [ 339696651484, 339685634280, 339684539032 ],\n \"samples_ts\": [ 1.50723, 1.50728, 1.50728 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 2,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T00:15:24Z\",\n \"avg_ns\": 407089803655,\n \"stddev_ns\": 7097566,\n \"avg_ts\": 1.257708,\n \"stddev_ts\": 0.000022,\n \"samples_ns\": [ 407097772754, 407087208846, 407084429367 ],\n \"samples_ts\": [ 1.25768, 1.25772, 1.25772 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-13T23:52:46Z", + "avg_ns": 339688941598, + "stddev_ns": 6749890, + "avg_ts": 1.507261, + "stddev_ts": 3e-05, + "samples_ns": [ + 339696651484, + 339685634280, + 339684539032 + ], + "samples_ts": [ + 1.50723, + 1.50728, + 1.50728 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 2, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T00:15:24Z", + "avg_ns": 407089803655, + "stddev_ns": 7097566, + "avg_ts": 1.257708, + "stddev_ts": 2.2e-05, + "samples_ns": [ + 407097772754, + 407087208846, + 407084429367 + ], + "samples_ts": [ + 1.25768, + 1.25772, + 1.25772 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 2, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1367 + }, + { + "timestamp_utc": "2025-12-14T00:43:15.566363+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T00:35:48Z\",\n \"avg_ns\": 56784706632,\n \"stddev_ns\": 3591710,\n \"avg_ts\": 2.254128,\n \"stddev_ts\": 0.000142,\n \"samples_ns\": [ 56788037706, 56780914699, 56785167492 ],\n \"samples_ts\": [ 2.254, 2.25428, 2.25411 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T00:39:35Z\",\n \"avg_ns\": 73165478202,\n \"stddev_ns\": 990177448,\n \"avg_ts\": 1.749671,\n \"stddev_ts\": 0.023495,\n \"samples_ns\": [ 72597957294, 72589650778, 74308826535 ],\n \"samples_ts\": [ 1.76314, 1.76334, 1.72254 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T00:35:48Z", + "avg_ns": 56784706632, + "stddev_ns": 3591710, + "avg_ts": 2.254128, + "stddev_ts": 0.000142, + "samples_ns": [ + 56788037706, + 56780914699, + 56785167492 + ], + "samples_ts": [ + 2.254, + 2.25428, + 2.25411 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T00:39:35Z", + "avg_ns": 73165478202, + "stddev_ns": 990177448, + "avg_ts": 1.749671, + "stddev_ts": 0.023495, + "samples_ns": [ + 72597957294, + 72589650778, + 74308826535 + ], + "samples_ts": [ + 1.76314, + 1.76334, + 1.72254 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1368 + }, + { + "timestamp_utc": "2025-12-14T01:02:12.223434+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T00:43:16Z\",\n \"avg_ns\": 56796051530,\n \"stddev_ns\": 2495939,\n \"avg_ts\": 2.253678,\n \"stddev_ts\": 0.000098,\n \"samples_ns\": [ 56795697795, 56798682431, 56793774366 ],\n \"samples_ts\": [ 2.25369, 2.25357, 2.25377 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T00:47:04Z\",\n \"avg_ns\": 302493684992,\n \"stddev_ns\": 1478817767,\n \"avg_ts\": 1.692739,\n \"stddev_ts\": 0.019025,\n \"samples_ns\": [ 298593213109, 304447531966, 304440309902 ],\n \"samples_ts\": [ 1.71471, 1.68173, 1.68177 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T00:43:16Z", + "avg_ns": 56796051530, + "stddev_ns": 2495939, + "avg_ts": 2.253678, + "stddev_ts": 9.8e-05, + "samples_ns": [ + 56795697795, + 56798682431, + 56793774366 + ], + "samples_ts": [ + 2.25369, + 2.25357, + 2.25377 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T00:47:04Z", + "avg_ns": 302493684992, + "stddev_ns": 1478817767, + "avg_ts": 1.692739, + "stddev_ts": 0.019025, + "samples_ns": [ + 298593213109, + 304447531966, + 304440309902 + ], + "samples_ts": [ + 1.71471, + 1.68173, + 1.68177 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1369 + }, + { + "timestamp_utc": "2025-12-14T01:21:03.626928+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T01:02:13Z\",\n \"avg_ns\": 227803263278,\n \"stddev_ns\": 4832910,\n \"avg_ts\": 2.247553,\n \"stddev_ts\": 0.000048,\n \"samples_ns\": [ 227808483146, 227798944029, 227802362659 ],\n \"samples_ts\": [ 2.2475, 2.2476, 2.24756 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T01:17:24Z\",\n \"avg_ns\": 72740255244,\n \"stddev_ns\": 69555655,\n \"avg_ts\": 1.759687,\n \"stddev_ts\": 0.001682,\n \"samples_ns\": [ 72703306995, 72696971482, 72820487256 ],\n \"samples_ts\": [ 1.76058, 1.76073, 1.75775 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T01:02:13Z", + "avg_ns": 227803263278, + "stddev_ns": 4832910, + "avg_ts": 2.247553, + "stddev_ts": 4.8e-05, + "samples_ns": [ + 227808483146, + 227798944029, + 227802362659 + ], + "samples_ts": [ + 2.2475, + 2.2476, + 2.24756 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T01:17:24Z", + "avg_ns": 72740255244, + "stddev_ns": 69555655, + "avg_ts": 1.759687, + "stddev_ts": 0.001682, + "samples_ns": [ + 72703306995, + 72696971482, + 72820487256 + ], + "samples_ts": [ + 1.76058, + 1.76073, + 1.75775 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1370 + }, + { + "timestamp_utc": "2025-12-14T01:51:22.698712+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T01:21:04Z\",\n \"avg_ns\": 227824385282,\n \"stddev_ns\": 12225024,\n \"avg_ts\": 2.247345,\n \"stddev_ts\": 0.000120,\n \"samples_ns\": [ 227838471734, 227817976327, 227816707786 ],\n \"samples_ts\": [ 2.24721, 2.24741, 2.24742 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T01:36:16Z\",\n \"avg_ns\": 301921736551,\n \"stddev_ns\": 3798305279,\n \"avg_ts\": 1.695984,\n \"stddev_ts\": 0.021492,\n \"samples_ns\": [ 297535878715, 304097022466, 304132308472 ],\n \"samples_ts\": [ 1.7208, 1.68367, 1.68348 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T01:21:04Z", + "avg_ns": 227824385282, + "stddev_ns": 12225024, + "avg_ts": 2.247345, + "stddev_ts": 0.00012, + "samples_ns": [ + 227838471734, + 227817976327, + 227816707786 + ], + "samples_ts": [ + 2.24721, + 2.24741, + 2.24742 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T01:36:16Z", + "avg_ns": 301921736551, + "stddev_ns": 3798305279, + "avg_ts": 1.695984, + "stddev_ts": 0.021492, + "samples_ns": [ + 297535878715, + 304097022466, + 304132308472 + ], + "samples_ts": [ + 1.7208, + 1.68367, + 1.68348 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1371 + }, + { + "timestamp_utc": "2025-12-14T01:58:52.484296+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T01:51:23Z\",\n \"avg_ns\": 56785905745,\n \"stddev_ns\": 2433111,\n \"avg_ts\": 2.254080,\n \"stddev_ts\": 0.000096,\n \"samples_ns\": [ 56783346796, 56788160966, 56786209474 ],\n \"samples_ts\": [ 2.25418, 2.25399, 2.25407 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T01:55:11Z\",\n \"avg_ns\": 73563558704,\n \"stddev_ns\": 1366705816,\n \"avg_ts\": 1.740388,\n \"stddev_ts\": 0.032014,\n \"samples_ns\": [ 72615143112, 72945399557, 75130133445 ],\n \"samples_ts\": [ 1.76272, 1.75474, 1.70371 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T01:51:23Z", + "avg_ns": 56785905745, + "stddev_ns": 2433111, + "avg_ts": 2.25408, + "stddev_ts": 9.6e-05, + "samples_ns": [ + 56783346796, + 56788160966, + 56786209474 + ], + "samples_ts": [ + 2.25418, + 2.25399, + 2.25407 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T01:55:11Z", + "avg_ns": 73563558704, + "stddev_ns": 1366705816, + "avg_ts": 1.740388, + "stddev_ts": 0.032014, + "samples_ns": [ + 72615143112, + 72945399557, + 75130133445 + ], + "samples_ts": [ + 1.76272, + 1.75474, + 1.70371 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1372 + }, + { + "timestamp_utc": "2025-12-14T02:17:49.469957+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T01:58:53Z\",\n \"avg_ns\": 56782353016,\n \"stddev_ns\": 2349100,\n \"avg_ts\": 2.254221,\n \"stddev_ts\": 0.000092,\n \"samples_ns\": [ 56784529566, 56782625508, 56779903976 ],\n \"samples_ts\": [ 2.25414, 2.25421, 2.25432 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T02:02:40Z\",\n \"avg_ns\": 302614142096,\n \"stddev_ns\": 4236012511,\n \"avg_ts\": 1.692032,\n \"stddev_ts\": 0.016605,\n \"samples_ns\": [ 299204271436, 304323104494, 304315050359 ],\n \"samples_ts\": [ 1.71121, 1.68242, 1.68247 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T01:58:53Z", + "avg_ns": 56782353016, + "stddev_ns": 2349100, + "avg_ts": 2.254221, + "stddev_ts": 9.2e-05, + "samples_ns": [ + 56784529566, + 56782625508, + 56779903976 + ], + "samples_ts": [ + 2.25414, + 2.25421, + 2.25432 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T02:02:40Z", + "avg_ns": 302614142096, + "stddev_ns": 4236012511, + "avg_ts": 1.692032, + "stddev_ts": 0.016605, + "samples_ns": [ + 299204271436, + 304323104494, + 304315050359 + ], + "samples_ts": [ + 1.71121, + 1.68242, + 1.68247 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1373 + }, + { + "timestamp_utc": "2025-12-14T02:36:44.077441+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T02:17:50Z\",\n \"avg_ns\": 228432210971,\n \"stddev_ns\": 3906766,\n \"avg_ts\": 2.241365,\n \"stddev_ts\": 0.000038,\n \"samples_ns\": [ 228429586638, 228430381634, 228436664642 ],\n \"samples_ts\": [ 2.24139, 2.24138, 2.24132 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T02:33:04Z\",\n \"avg_ns\": 72966867247,\n \"stddev_ns\": 680893371,\n \"avg_ts\": 1.754322,\n \"stddev_ts\": 0.016283,\n \"samples_ns\": [ 72576871011, 72570643823, 73753086908 ],\n \"samples_ts\": [ 1.76365, 1.7638, 1.73552 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T02:17:50Z", + "avg_ns": 228432210971, + "stddev_ns": 3906766, + "avg_ts": 2.241365, + "stddev_ts": 3.8e-05, + "samples_ns": [ + 228429586638, + 228430381634, + 228436664642 + ], + "samples_ts": [ + 2.24139, + 2.24138, + 2.24132 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T02:33:04Z", + "avg_ns": 72966867247, + "stddev_ns": 680893371, + "avg_ts": 1.754322, + "stddev_ts": 0.016283, + "samples_ns": [ + 72576871011, + 72570643823, + 73753086908 + ], + "samples_ts": [ + 1.76365, + 1.7638, + 1.73552 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1374 + }, + { + "timestamp_utc": "2025-12-14T03:07:05.447367+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T02:36:45Z\",\n \"avg_ns\": 228394757986,\n \"stddev_ns\": 6165049,\n \"avg_ts\": 2.241733,\n \"stddev_ts\": 0.000060,\n \"samples_ns\": [ 228399009118, 228387710495, 228397554346 ],\n \"samples_ts\": [ 2.24169, 2.2418, 2.24171 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T02:51:58Z\",\n \"avg_ns\": 301929426390,\n \"stddev_ns\": 3929310093,\n \"avg_ts\": 1.695953,\n \"stddev_ts\": 0.022238,\n \"samples_ns\": [ 297392377150, 304168523910, 304227378110 ],\n \"samples_ts\": [ 1.72163, 1.68328, 1.68295 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T02:36:45Z", + "avg_ns": 228394757986, + "stddev_ns": 6165049, + "avg_ts": 2.241733, + "stddev_ts": 6e-05, + "samples_ns": [ + 228399009118, + 228387710495, + 228397554346 + ], + "samples_ts": [ + 2.24169, + 2.2418, + 2.24171 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T02:51:58Z", + "avg_ns": 301929426390, + "stddev_ns": 3929310093, + "avg_ts": 1.695953, + "stddev_ts": 0.022238, + "samples_ns": [ + 297392377150, + 304168523910, + 304227378110 + ], + "samples_ts": [ + 1.72163, + 1.68328, + 1.68295 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1375 + }, + { + "timestamp_utc": "2025-12-14T03:14:34.892359+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T03:07:06Z\",\n \"avg_ns\": 56791944996,\n \"stddev_ns\": 873286,\n \"avg_ts\": 2.253841,\n \"stddev_ts\": 0.000033,\n \"samples_ns\": [ 56791029635, 56792680926, 56792124428 ],\n \"samples_ts\": [ 2.25388, 2.25381, 2.25383 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T03:10:53Z\",\n \"avg_ns\": 73431916442,\n \"stddev_ns\": 3348872440,\n \"avg_ts\": 1.743536,\n \"stddev_ts\": 0.033142,\n \"samples_ns\": [ 72574626942, 72660408922, 75060713464 ],\n \"samples_ts\": [ 1.7637, 1.76162, 1.70529 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T03:07:06Z", + "avg_ns": 56791944996, + "stddev_ns": 873286, + "avg_ts": 2.253841, + "stddev_ts": 3.3e-05, + "samples_ns": [ + 56791029635, + 56792680926, + 56792124428 + ], + "samples_ts": [ + 2.25388, + 2.25381, + 2.25383 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T03:10:53Z", + "avg_ns": 73431916442, + "stddev_ns": 3348872440, + "avg_ts": 1.743536, + "stddev_ts": 0.033142, + "samples_ns": [ + 72574626942, + 72660408922, + 75060713464 + ], + "samples_ts": [ + 1.7637, + 1.76162, + 1.70529 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1376 + }, + { + "timestamp_utc": "2025-12-14T03:33:31.696744+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T03:14:36Z\",\n \"avg_ns\": 56780383837,\n \"stddev_ns\": 1783251,\n \"avg_ts\": 2.254300,\n \"stddev_ts\": 0.000070,\n \"samples_ns\": [ 56778614908, 56782149422, 56780387182 ],\n \"samples_ts\": [ 2.25437, 2.25423, 2.2543 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T03:18:23Z\",\n \"avg_ns\": 302566236278,\n \"stddev_ns\": 1189039714,\n \"avg_ts\": 1.692323,\n \"stddev_ts\": 0.018356,\n \"samples_ns\": [ 298800218677, 304444690171, 304453799987 ],\n \"samples_ts\": [ 1.71352, 1.68175, 1.6817 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T03:14:36Z", + "avg_ns": 56780383837, + "stddev_ns": 1783251, + "avg_ts": 2.2543, + "stddev_ts": 7e-05, + "samples_ns": [ + 56778614908, + 56782149422, + 56780387182 + ], + "samples_ts": [ + 2.25437, + 2.25423, + 2.2543 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T03:18:23Z", + "avg_ns": 302566236278, + "stddev_ns": 1189039714, + "avg_ts": 1.692323, + "stddev_ts": 0.018356, + "samples_ns": [ + 298800218677, + 304444690171, + 304453799987 + ], + "samples_ts": [ + 1.71352, + 1.68175, + 1.6817 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1377 + }, + { + "timestamp_utc": "2025-12-14T03:52:30.973330+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T03:33:32Z\",\n \"avg_ns\": 229530496896,\n \"stddev_ns\": 5497061,\n \"avg_ts\": 2.230640,\n \"stddev_ts\": 0.000053,\n \"samples_ns\": [ 229525184273, 229536161481, 229530144934 ],\n \"samples_ts\": [ 2.23069, 2.23059, 2.23064 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T03:48:51Z\",\n \"avg_ns\": 73055604556,\n \"stddev_ns\": 3113920790,\n \"avg_ts\": 1.752193,\n \"stddev_ts\": 0.016408,\n \"samples_ns\": [ 72658112378, 72658841994, 73849859297 ],\n \"samples_ts\": [ 1.76168, 1.76166, 1.73325 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T03:33:32Z", + "avg_ns": 229530496896, + "stddev_ns": 5497061, + "avg_ts": 2.23064, + "stddev_ts": 5.3e-05, + "samples_ns": [ + 229525184273, + 229536161481, + 229530144934 + ], + "samples_ts": [ + 2.23069, + 2.23059, + 2.23064 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T03:48:51Z", + "avg_ns": 73055604556, + "stddev_ns": 3113920790, + "avg_ts": 1.752193, + "stddev_ts": 0.016408, + "samples_ns": [ + 72658112378, + 72658841994, + 73849859297 + ], + "samples_ts": [ + 1.76168, + 1.76166, + 1.73325 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1378 + }, + { + "timestamp_utc": "2025-12-14T04:22:56.703961+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T03:52:32Z\",\n \"avg_ns\": 229556676284,\n \"stddev_ns\": 4342962,\n \"avg_ts\": 2.230386,\n \"stddev_ts\": 0.000042,\n \"samples_ns\": [ 229561197457, 229552663139, 229556168258 ],\n \"samples_ts\": [ 2.23034, 2.23043, 2.23039 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T04:07:50Z\",\n \"avg_ns\": 301834118645,\n \"stddev_ns\": 2242004709,\n \"avg_ts\": 1.696474,\n \"stddev_ts\": 0.021371,\n \"samples_ns\": [ 297475226314, 304013269099, 304013860523 ],\n \"samples_ts\": [ 1.72115, 1.68414, 1.68413 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T03:52:32Z", + "avg_ns": 229556676284, + "stddev_ns": 4342962, + "avg_ts": 2.230386, + "stddev_ts": 4.2e-05, + "samples_ns": [ + 229561197457, + 229552663139, + 229556168258 + ], + "samples_ts": [ + 2.23034, + 2.23043, + 2.23039 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T04:07:50Z", + "avg_ns": 301834118645, + "stddev_ns": 2242004709, + "avg_ts": 1.696474, + "stddev_ts": 0.021371, + "samples_ns": [ + 297475226314, + 304013269099, + 304013860523 + ], + "samples_ts": [ + 1.72115, + 1.68414, + 1.68413 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1379 + }, + { + "timestamp_utc": "2025-12-14T04:30:25.774389+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T04:22:57Z\",\n \"avg_ns\": 56788971169,\n \"stddev_ns\": 652727,\n \"avg_ts\": 2.253959,\n \"stddev_ts\": 0.000026,\n \"samples_ns\": [ 56788285575, 56789585120, 56789042812 ],\n \"samples_ts\": [ 2.25399, 2.25393, 2.25396 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T04:26:45Z\",\n \"avg_ns\": 73315896706,\n \"stddev_ns\": 1163378857,\n \"avg_ts\": 1.746160,\n \"stddev_ts\": 0.027457,\n \"samples_ns\": [ 72642178003, 72646263286, 74659248829 ],\n \"samples_ts\": [ 1.76206, 1.76196, 1.71446 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T04:22:57Z", + "avg_ns": 56788971169, + "stddev_ns": 652727, + "avg_ts": 2.253959, + "stddev_ts": 2.6e-05, + "samples_ns": [ + 56788285575, + 56789585120, + 56789042812 + ], + "samples_ts": [ + 2.25399, + 2.25393, + 2.25396 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T04:26:45Z", + "avg_ns": 73315896706, + "stddev_ns": 1163378857, + "avg_ts": 1.74616, + "stddev_ts": 0.027457, + "samples_ns": [ + 72642178003, + 72646263286, + 74659248829 + ], + "samples_ts": [ + 1.76206, + 1.76196, + 1.71446 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1380 + }, + { + "timestamp_utc": "2025-12-14T04:49:22.644262+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T04:30:27Z\",\n \"avg_ns\": 56784501868,\n \"stddev_ns\": 2052164,\n \"avg_ts\": 2.254136,\n \"stddev_ts\": 0.000081,\n \"samples_ns\": [ 56785828953, 56782154958, 56785521694 ],\n \"samples_ts\": [ 2.25408, 2.25423, 2.2541 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T04:34:14Z\",\n \"avg_ns\": 302577219228,\n \"stddev_ns\": 4202925026,\n \"avg_ts\": 1.692235,\n \"stddev_ts\": 0.016340,\n \"samples_ns\": [ 299222449455, 304273220505, 304235987724 ],\n \"samples_ts\": [ 1.7111, 1.6827, 1.6829 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T04:30:27Z", + "avg_ns": 56784501868, + "stddev_ns": 2052164, + "avg_ts": 2.254136, + "stddev_ts": 8.1e-05, + "samples_ns": [ + 56785828953, + 56782154958, + 56785521694 + ], + "samples_ts": [ + 2.25408, + 2.25423, + 2.2541 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T04:34:14Z", + "avg_ns": 302577219228, + "stddev_ns": 4202925026, + "avg_ts": 1.692235, + "stddev_ts": 0.01634, + "samples_ns": [ + 299222449455, + 304273220505, + 304235987724 + ], + "samples_ts": [ + 1.7111, + 1.6827, + 1.6829 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1381 + }, + { + "timestamp_utc": "2025-12-14T05:08:16.594364+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T04:49:23Z\",\n \"avg_ns\": 227848339311,\n \"stddev_ns\": 4236608,\n \"avg_ts\": 2.247109,\n \"stddev_ts\": 0.000042,\n \"samples_ns\": [ 227850442649, 227843492516, 227851082769 ],\n \"samples_ts\": [ 2.24709, 2.24716, 2.24708 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T05:04:35Z\",\n \"avg_ns\": 73516208721,\n \"stddev_ns\": 3310096619,\n \"avg_ts\": 1.741481,\n \"stddev_ts\": 0.030880,\n \"samples_ns\": [ 72640753706, 72877576005, 75030296454 ],\n \"samples_ts\": [ 1.7621, 1.75637, 1.70598 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T04:49:23Z", + "avg_ns": 227848339311, + "stddev_ns": 4236608, + "avg_ts": 2.247109, + "stddev_ts": 4.2e-05, + "samples_ns": [ + 227850442649, + 227843492516, + 227851082769 + ], + "samples_ts": [ + 2.24709, + 2.24716, + 2.24708 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T05:04:35Z", + "avg_ns": 73516208721, + "stddev_ns": 3310096619, + "avg_ts": 1.741481, + "stddev_ts": 0.03088, + "samples_ns": [ + 72640753706, + 72877576005, + 75030296454 + ], + "samples_ts": [ + 1.7621, + 1.75637, + 1.70598 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1382 + }, + { + "timestamp_utc": "2025-12-14T05:38:38.660462+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T05:08:17Z\",\n \"avg_ns\": 227895657503,\n \"stddev_ns\": 12368354,\n \"avg_ts\": 2.246642,\n \"stddev_ts\": 0.000122,\n \"samples_ns\": [ 227908885349, 227893657526, 227884429636 ],\n \"samples_ts\": [ 2.24651, 2.24666, 2.24675 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T05:23:29Z\",\n \"avg_ns\": 302826750950,\n \"stddev_ns\": 4113727510,\n \"avg_ts\": 1.690831,\n \"stddev_ts\": 0.015575,\n \"samples_ns\": [ 299622716789, 304426356545, 304431179516 ],\n \"samples_ts\": [ 1.70882, 1.68185, 1.68183 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T05:08:17Z", + "avg_ns": 227895657503, + "stddev_ns": 12368354, + "avg_ts": 2.246642, + "stddev_ts": 0.000122, + "samples_ns": [ + 227908885349, + 227893657526, + 227884429636 + ], + "samples_ts": [ + 2.24651, + 2.24666, + 2.24675 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T05:23:29Z", + "avg_ns": 302826750950, + "stddev_ns": 4113727510, + "avg_ts": 1.690831, + "stddev_ts": 0.015575, + "samples_ns": [ + 299622716789, + 304426356545, + 304431179516 + ], + "samples_ts": [ + 1.70882, + 1.68185, + 1.68183 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1383 + }, + { + "timestamp_utc": "2025-12-14T05:46:09.823133+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T05:38:39Z\",\n \"avg_ns\": 56790470081,\n \"stddev_ns\": 1128451,\n \"avg_ts\": 2.253899,\n \"stddev_ts\": 0.000045,\n \"samples_ns\": [ 56791770333, 56789893512, 56789746398 ],\n \"samples_ts\": [ 2.25385, 2.25392, 2.25393 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T05:42:27Z\",\n \"avg_ns\": 73999862890,\n \"stddev_ns\": 1255618952,\n \"avg_ts\": 1.730066,\n \"stddev_ts\": 0.029451,\n \"samples_ns\": [ 72673512806, 74155913149, 75170162715 ],\n \"samples_ts\": [ 1.7613, 1.72609, 1.7028 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T05:38:39Z", + "avg_ns": 56790470081, + "stddev_ns": 1128451, + "avg_ts": 2.253899, + "stddev_ts": 4.5e-05, + "samples_ns": [ + 56791770333, + 56789893512, + 56789746398 + ], + "samples_ts": [ + 2.25385, + 2.25392, + 2.25393 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T05:42:27Z", + "avg_ns": 73999862890, + "stddev_ns": 1255618952, + "avg_ts": 1.730066, + "stddev_ts": 0.029451, + "samples_ns": [ + 72673512806, + 74155913149, + 75170162715 + ], + "samples_ts": [ + 1.7613, + 1.72609, + 1.7028 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1384 + }, + { + "timestamp_utc": "2025-12-14T06:05:07.970346+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T05:46:11Z\",\n \"avg_ns\": 56800766371,\n \"stddev_ns\": 5303767,\n \"avg_ts\": 2.253491,\n \"stddev_ts\": 0.000210,\n \"samples_ns\": [ 56795424126, 56800855158, 56806019830 ],\n \"samples_ts\": [ 2.2537, 2.25349, 2.25328 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T05:49:58Z\",\n \"avg_ns\": 302968818908,\n \"stddev_ns\": 3954598232,\n \"avg_ts\": 1.690022,\n \"stddev_ts\": 0.014197,\n \"samples_ns\": [ 300044118700, 304422739530, 304439598496 ],\n \"samples_ts\": [ 1.70642, 1.68187, 1.68178 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T05:46:11Z", + "avg_ns": 56800766371, + "stddev_ns": 5303767, + "avg_ts": 2.253491, + "stddev_ts": 0.00021, + "samples_ns": [ + 56795424126, + 56800855158, + 56806019830 + ], + "samples_ts": [ + 2.2537, + 2.25349, + 2.25328 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T05:49:58Z", + "avg_ns": 302968818908, + "stddev_ns": 3954598232, + "avg_ts": 1.690022, + "stddev_ts": 0.014197, + "samples_ns": [ + 300044118700, + 304422739530, + 304439598496 + ], + "samples_ts": [ + 1.70642, + 1.68187, + 1.68178 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1385 + }, + { + "timestamp_utc": "2025-12-14T06:24:04.573116+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T06:05:09Z\",\n \"avg_ns\": 228399829996,\n \"stddev_ns\": 5686021,\n \"avg_ts\": 2.241683,\n \"stddev_ts\": 0.000056,\n \"samples_ns\": [ 228401909768, 228393396897, 228404183323 ],\n \"samples_ts\": [ 2.24166, 2.24175, 2.24164 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T06:20:22Z\",\n \"avg_ns\": 73665981332,\n \"stddev_ns\": 1292697719,\n \"avg_ts\": 1.737927,\n \"stddev_ts\": 0.030291,\n \"samples_ns\": [ 72598283111, 73296457527, 75103203360 ],\n \"samples_ts\": [ 1.76313, 1.74633, 1.70432 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T06:05:09Z", + "avg_ns": 228399829996, + "stddev_ns": 5686021, + "avg_ts": 2.241683, + "stddev_ts": 5.6e-05, + "samples_ns": [ + 228401909768, + 228393396897, + 228404183323 + ], + "samples_ts": [ + 2.24166, + 2.24175, + 2.24164 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T06:20:22Z", + "avg_ns": 73665981332, + "stddev_ns": 1292697719, + "avg_ts": 1.737927, + "stddev_ts": 0.030291, + "samples_ns": [ + 72598283111, + 73296457527, + 75103203360 + ], + "samples_ts": [ + 1.76313, + 1.74633, + 1.70432 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1386 + }, + { + "timestamp_utc": "2025-12-14T06:54:28.486124+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T06:24:05Z\",\n \"avg_ns\": 228363612011,\n \"stddev_ns\": 8038215,\n \"avg_ts\": 2.242038,\n \"stddev_ts\": 0.000079,\n \"samples_ns\": [ 228354468868, 228366975953, 228369391214 ],\n \"samples_ts\": [ 2.24213, 2.24201, 2.24198 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T06:39:19Z\",\n \"avg_ns\": 302810214071,\n \"stddev_ns\": 2553125817,\n \"avg_ts\": 1.690909,\n \"stddev_ts\": 0.014327,\n \"samples_ns\": [ 299862121835, 304288166904, 304280353475 ],\n \"samples_ts\": [ 1.70745, 1.68262, 1.68266 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T06:24:05Z", + "avg_ns": 228363612011, + "stddev_ns": 8038215, + "avg_ts": 2.242038, + "stddev_ts": 7.9e-05, + "samples_ns": [ + 228354468868, + 228366975953, + 228369391214 + ], + "samples_ts": [ + 2.24213, + 2.24201, + 2.24198 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T06:39:19Z", + "avg_ns": 302810214071, + "stddev_ns": 2553125817, + "avg_ts": 1.690909, + "stddev_ts": 0.014327, + "samples_ns": [ + 299862121835, + 304288166904, + 304280353475 + ], + "samples_ts": [ + 1.70745, + 1.68262, + 1.68266 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1387 + }, + { + "timestamp_utc": "2025-12-14T07:01:57.373982+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T06:54:29Z\",\n \"avg_ns\": 56791748394,\n \"stddev_ns\": 1803454,\n \"avg_ts\": 2.253849,\n \"stddev_ts\": 0.000072,\n \"samples_ns\": [ 56789888050, 56793488991, 56791868141 ],\n \"samples_ts\": [ 2.25392, 2.25378, 2.25384 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T06:58:16Z\",\n \"avg_ns\": 73256201827,\n \"stddev_ns\": 1090713284,\n \"avg_ts\": 1.747548,\n \"stddev_ts\": 0.025798,\n \"samples_ns\": [ 72610365029, 72642730034, 74515510418 ],\n \"samples_ts\": [ 1.76283, 1.76205, 1.71776 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T06:54:29Z", + "avg_ns": 56791748394, + "stddev_ns": 1803454, + "avg_ts": 2.253849, + "stddev_ts": 7.2e-05, + "samples_ns": [ + 56789888050, + 56793488991, + 56791868141 + ], + "samples_ts": [ + 2.25392, + 2.25378, + 2.25384 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T06:58:16Z", + "avg_ns": 73256201827, + "stddev_ns": 1090713284, + "avg_ts": 1.747548, + "stddev_ts": 0.025798, + "samples_ns": [ + 72610365029, + 72642730034, + 74515510418 + ], + "samples_ts": [ + 1.76283, + 1.76205, + 1.71776 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1388 + }, + { + "timestamp_utc": "2025-12-14T07:20:52.474816+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T07:01:58Z\",\n \"avg_ns\": 56783261179,\n \"stddev_ns\": 1567923,\n \"avg_ts\": 2.254185,\n \"stddev_ts\": 0.000061,\n \"samples_ns\": [ 56783299403, 56781711146, 56784772990 ],\n \"samples_ts\": [ 2.25418, 2.25425, 2.25413 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T07:05:45Z\",\n \"avg_ns\": 301991560945,\n \"stddev_ns\": 3688973646,\n \"avg_ts\": 1.695581,\n \"stddev_ts\": 0.020859,\n \"samples_ns\": [ 297732064952, 304088951873, 304153666010 ],\n \"samples_ts\": [ 1.71967, 1.68372, 1.68336 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T07:01:58Z", + "avg_ns": 56783261179, + "stddev_ns": 1567923, + "avg_ts": 2.254185, + "stddev_ts": 6.1e-05, + "samples_ns": [ + 56783299403, + 56781711146, + 56784772990 + ], + "samples_ts": [ + 2.25418, + 2.25425, + 2.25413 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T07:05:45Z", + "avg_ns": 301991560945, + "stddev_ns": 3688973646, + "avg_ts": 1.695581, + "stddev_ts": 0.020859, + "samples_ns": [ + 297732064952, + 304088951873, + 304153666010 + ], + "samples_ts": [ + 1.71967, + 1.68372, + 1.68336 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1389 + }, + { + "timestamp_utc": "2025-12-14T07:39:51.214939+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T07:20:53Z\",\n \"avg_ns\": 229431557340,\n \"stddev_ns\": 152446986,\n \"avg_ts\": 2.231603,\n \"stddev_ts\": 0.001483,\n \"samples_ns\": [ 229255527361, 229519982967, 229519161692 ],\n \"samples_ts\": [ 2.23332, 2.23074, 2.23075 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T07:36:11Z\",\n \"avg_ns\": 72981969825,\n \"stddev_ns\": 511553427,\n \"avg_ts\": 1.753915,\n \"stddev_ts\": 0.012244,\n \"samples_ns\": [ 72692135301, 72681147563, 73572626613 ],\n \"samples_ts\": [ 1.76085, 1.76112, 1.73978 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T07:20:53Z", + "avg_ns": 229431557340, + "stddev_ns": 152446986, + "avg_ts": 2.231603, + "stddev_ts": 0.001483, + "samples_ns": [ + 229255527361, + 229519982967, + 229519161692 + ], + "samples_ts": [ + 2.23332, + 2.23074, + 2.23075 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T07:36:11Z", + "avg_ns": 72981969825, + "stddev_ns": 511553427, + "avg_ts": 1.753915, + "stddev_ts": 0.012244, + "samples_ns": [ + 72692135301, + 72681147563, + 73572626613 + ], + "samples_ts": [ + 1.76085, + 1.76112, + 1.73978 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1390 + }, + { + "timestamp_utc": "2025-12-14T08:10:16.343833+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T07:39:52Z\",\n \"avg_ns\": 229560573790,\n \"stddev_ns\": 4582457,\n \"avg_ts\": 2.230348,\n \"stddev_ts\": 0.000044,\n \"samples_ns\": [ 229555312246, 229563284988, 229563124137 ],\n \"samples_ts\": [ 2.2304, 2.23032, 2.23032 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T07:55:10Z\",\n \"avg_ns\": 301624696817,\n \"stddev_ns\": 1101346445,\n \"avg_ts\": 1.697720,\n \"stddev_ts\": 0.025170,\n \"samples_ns\": [ 296504839475, 304182877333, 304186373645 ],\n \"samples_ts\": [ 1.72678, 1.6832, 1.68318 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T07:39:52Z", + "avg_ns": 229560573790, + "stddev_ns": 4582457, + "avg_ts": 2.230348, + "stddev_ts": 4.4e-05, + "samples_ns": [ + 229555312246, + 229563284988, + 229563124137 + ], + "samples_ts": [ + 2.2304, + 2.23032, + 2.23032 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T07:55:10Z", + "avg_ns": 301624696817, + "stddev_ns": 1101346445, + "avg_ts": 1.69772, + "stddev_ts": 0.02517, + "samples_ns": [ + 296504839475, + 304182877333, + 304186373645 + ], + "samples_ts": [ + 1.72678, + 1.6832, + 1.68318 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1391 + }, + { + "timestamp_utc": "2025-12-14T08:17:43.562325+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T08:10:17Z\",\n \"avg_ns\": 56806008548,\n \"stddev_ns\": 2113610,\n \"avg_ts\": 2.253283,\n \"stddev_ts\": 0.000083,\n \"samples_ns\": [ 56807135082, 56803585514, 56807305049 ],\n \"samples_ts\": [ 2.25324, 2.25338, 2.25323 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T08:14:04Z\",\n \"avg_ns\": 72671773288,\n \"stddev_ns\": 197558268,\n \"avg_ts\": 1.761353,\n \"stddev_ts\": 0.004781,\n \"samples_ns\": [ 72561984419, 72553494400, 72899841046 ],\n \"samples_ts\": [ 1.76401, 1.76422, 1.75583 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T08:10:17Z", + "avg_ns": 56806008548, + "stddev_ns": 2113610, + "avg_ts": 2.253283, + "stddev_ts": 8.3e-05, + "samples_ns": [ + 56807135082, + 56803585514, + 56807305049 + ], + "samples_ts": [ + 2.25324, + 2.25338, + 2.25323 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T08:14:04Z", + "avg_ns": 72671773288, + "stddev_ns": 197558268, + "avg_ts": 1.761353, + "stddev_ts": 0.004781, + "samples_ns": [ + 72561984419, + 72553494400, + 72899841046 + ], + "samples_ts": [ + 1.76401, + 1.76422, + 1.75583 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1392 + }, + { + "timestamp_utc": "2025-12-14T08:36:37.215710+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T08:17:44Z\",\n \"avg_ns\": 56781843300,\n \"stddev_ns\": 1840910,\n \"avg_ts\": 2.254242,\n \"stddev_ts\": 0.000072,\n \"samples_ns\": [ 56783359175, 56782353733, 56779816993 ],\n \"samples_ts\": [ 2.25418, 2.25422, 2.25432 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T08:21:31Z\",\n \"avg_ns\": 301561190854,\n \"stddev_ns\": 1972214947,\n \"avg_ts\": 1.698112,\n \"stddev_ts\": 0.026856,\n \"samples_ns\": [ 296103919387, 304285706367, 304293946808 ],\n \"samples_ts\": [ 1.72912, 1.68263, 1.68258 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T08:17:44Z", + "avg_ns": 56781843300, + "stddev_ns": 1840910, + "avg_ts": 2.254242, + "stddev_ts": 7.2e-05, + "samples_ns": [ + 56783359175, + 56782353733, + 56779816993 + ], + "samples_ts": [ + 2.25418, + 2.25422, + 2.25432 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T08:21:31Z", + "avg_ns": 301561190854, + "stddev_ns": 1972214947, + "avg_ts": 1.698112, + "stddev_ts": 0.026856, + "samples_ns": [ + 296103919387, + 304285706367, + 304293946808 + ], + "samples_ts": [ + 1.72912, + 1.68263, + 1.68258 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1393 + }, + { + "timestamp_utc": "2025-12-14T08:55:29.245485+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T08:36:38Z\",\n \"avg_ns\": 228010988156,\n \"stddev_ns\": 9592636,\n \"avg_ts\": 2.245506,\n \"stddev_ts\": 0.000094,\n \"samples_ns\": [ 228019110523, 228013414116, 228000439831 ],\n \"samples_ts\": [ 2.24543, 2.24548, 2.24561 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T08:51:50Z\",\n \"avg_ns\": 72651861319,\n \"stddev_ns\": 28335365,\n \"avg_ts\": 1.761827,\n \"stddev_ts\": 0.000687,\n \"samples_ns\": [ 72644790935, 72627733271, 72683059753 ],\n \"samples_ts\": [ 1.762, 1.76241, 1.76107 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T08:36:38Z", + "avg_ns": 228010988156, + "stddev_ns": 9592636, + "avg_ts": 2.245506, + "stddev_ts": 9.4e-05, + "samples_ns": [ + 228019110523, + 228013414116, + 228000439831 + ], + "samples_ts": [ + 2.24543, + 2.24548, + 2.24561 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T08:51:50Z", + "avg_ns": 72651861319, + "stddev_ns": 28335365, + "avg_ts": 1.761827, + "stddev_ts": 0.000687, + "samples_ns": [ + 72644790935, + 72627733271, + 72683059753 + ], + "samples_ts": [ + 1.762, + 1.76241, + 1.76107 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1394 + }, + { + "timestamp_utc": "2025-12-14T09:25:46.197847+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T08:55:30Z\",\n \"avg_ns\": 227809010829,\n \"stddev_ns\": 9499739,\n \"avg_ts\": 2.247497,\n \"stddev_ts\": 0.000094,\n \"samples_ns\": [ 227798650537, 227817275696, 227811106255 ],\n \"samples_ts\": [ 2.2476, 2.24742, 2.24748 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T09:10:41Z\",\n \"avg_ns\": 301234227819,\n \"stddev_ns\": 4154477490,\n \"avg_ts\": 1.700008,\n \"stddev_ts\": 0.029331,\n \"samples_ns\": [ 295292808659, 304292736747, 304117138052 ],\n \"samples_ts\": [ 1.73387, 1.68259, 1.68356 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T08:55:30Z", + "avg_ns": 227809010829, + "stddev_ns": 9499739, + "avg_ts": 2.247497, + "stddev_ts": 9.4e-05, + "samples_ns": [ + 227798650537, + 227817275696, + 227811106255 + ], + "samples_ts": [ + 2.2476, + 2.24742, + 2.24748 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T09:10:41Z", + "avg_ns": 301234227819, + "stddev_ns": 4154477490, + "avg_ts": 1.700008, + "stddev_ts": 0.029331, + "samples_ns": [ + 295292808659, + 304292736747, + 304117138052 + ], + "samples_ts": [ + 1.73387, + 1.68259, + 1.68356 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1395 + }, + { + "timestamp_utc": "2025-12-14T09:33:14.244994+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T09:25:47Z\",\n \"avg_ns\": 56785759814,\n \"stddev_ns\": 314635,\n \"avg_ts\": 2.254086,\n \"stddev_ts\": 0.000012,\n \"samples_ns\": [ 56785469722, 56785715439, 56786094281 ],\n \"samples_ts\": [ 2.2541, 2.25409, 2.25407 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T09:29:34Z\",\n \"avg_ns\": 72969603496,\n \"stddev_ns\": 534483824,\n \"avg_ts\": 1.754218,\n \"stddev_ts\": 0.012795,\n \"samples_ns\": [ 72662915587, 72659126601, 73586768301 ],\n \"samples_ts\": [ 1.76156, 1.76165, 1.73944 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T09:25:47Z", + "avg_ns": 56785759814, + "stddev_ns": 314635, + "avg_ts": 2.254086, + "stddev_ts": 1.2e-05, + "samples_ns": [ + 56785469722, + 56785715439, + 56786094281 + ], + "samples_ts": [ + 2.2541, + 2.25409, + 2.25407 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T09:29:34Z", + "avg_ns": 72969603496, + "stddev_ns": 534483824, + "avg_ts": 1.754218, + "stddev_ts": 0.012795, + "samples_ns": [ + 72662915587, + 72659126601, + 73586768301 + ], + "samples_ts": [ + 1.76156, + 1.76165, + 1.73944 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1396 + }, + { + "timestamp_utc": "2025-12-14T09:52:08.965191+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T09:33:15Z\",\n \"avg_ns\": 56763657874,\n \"stddev_ns\": 5096205,\n \"avg_ts\": 2.254964,\n \"stddev_ts\": 0.000202,\n \"samples_ns\": [ 56767230482, 56765921096, 56757822044 ],\n \"samples_ts\": [ 2.25482, 2.25487, 2.2552 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T09:37:02Z\",\n \"avg_ns\": 301891406553,\n \"stddev_ns\": 4041523848,\n \"avg_ts\": 1.696178,\n \"stddev_ts\": 0.022884,\n \"samples_ns\": [ 297224657124, 304223465576, 304226096961 ],\n \"samples_ts\": [ 1.7226, 1.68297, 1.68296 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T09:33:15Z", + "avg_ns": 56763657874, + "stddev_ns": 5096205, + "avg_ts": 2.254964, + "stddev_ts": 0.000202, + "samples_ns": [ + 56767230482, + 56765921096, + 56757822044 + ], + "samples_ts": [ + 2.25482, + 2.25487, + 2.2552 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T09:37:02Z", + "avg_ns": 301891406553, + "stddev_ns": 4041523848, + "avg_ts": 1.696178, + "stddev_ts": 0.022884, + "samples_ns": [ + 297224657124, + 304223465576, + 304226096961 + ], + "samples_ts": [ + 1.7226, + 1.68297, + 1.68296 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1397 + }, + { + "timestamp_utc": "2025-12-14T10:11:02.581045+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T09:52:10Z\",\n \"avg_ns\": 228356576929,\n \"stddev_ns\": 10920963,\n \"avg_ts\": 2.242108,\n \"stddev_ts\": 0.000107,\n \"samples_ns\": [ 228355453751, 228346271461, 228368005576 ],\n \"samples_ts\": [ 2.24212, 2.24221, 2.242 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T10:07:23Z\",\n \"avg_ns\": 72721843514,\n \"stddev_ns\": 86169022,\n \"avg_ts\": 1.760133,\n \"stddev_ts\": 0.002084,\n \"samples_ns\": [ 72673913766, 72670296256, 72821320521 ],\n \"samples_ts\": [ 1.76129, 1.76138, 1.75773 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T09:52:10Z", + "avg_ns": 228356576929, + "stddev_ns": 10920963, + "avg_ts": 2.242108, + "stddev_ts": 0.000107, + "samples_ns": [ + 228355453751, + 228346271461, + 228368005576 + ], + "samples_ts": [ + 2.24212, + 2.24221, + 2.242 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T10:07:23Z", + "avg_ns": 72721843514, + "stddev_ns": 86169022, + "avg_ts": 1.760133, + "stddev_ts": 0.002084, + "samples_ns": [ + 72673913766, + 72670296256, + 72821320521 + ], + "samples_ts": [ + 1.76129, + 1.76138, + 1.75773 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1398 + }, + { + "timestamp_utc": "2025-12-14T10:41:22.376248+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T10:11:03Z\",\n \"avg_ns\": 228376035081,\n \"stddev_ns\": 3362894,\n \"avg_ts\": 2.241916,\n \"stddev_ts\": 0.000033,\n \"samples_ns\": [ 228374134562, 228379917930, 228374052751 ],\n \"samples_ts\": [ 2.24194, 2.24188, 2.24194 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T10:26:17Z\",\n \"avg_ns\": 301424510275,\n \"stddev_ns\": 1762982509,\n \"avg_ts\": 1.698872,\n \"stddev_ts\": 0.026402,\n \"samples_ns\": [ 296063581113, 304118402211, 304091547501 ],\n \"samples_ts\": [ 1.72936, 1.68355, 1.6837 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T10:11:03Z", + "avg_ns": 228376035081, + "stddev_ns": 3362894, + "avg_ts": 2.241916, + "stddev_ts": 3.3e-05, + "samples_ns": [ + 228374134562, + 228379917930, + 228374052751 + ], + "samples_ts": [ + 2.24194, + 2.24188, + 2.24194 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T10:26:17Z", + "avg_ns": 301424510275, + "stddev_ns": 1762982509, + "avg_ts": 1.698872, + "stddev_ts": 0.026402, + "samples_ns": [ + 296063581113, + 304118402211, + 304091547501 + ], + "samples_ts": [ + 1.72936, + 1.68355, + 1.6837 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1399 + }, + { + "timestamp_utc": "2025-12-14T10:48:49.570735+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T10:41:23Z\",\n \"avg_ns\": 56799577023,\n \"stddev_ns\": 678496,\n \"avg_ts\": 2.253538,\n \"stddev_ts\": 0.000025,\n \"samples_ns\": [ 56800170736, 56798907089, 56799653245 ],\n \"samples_ts\": [ 2.25351, 2.25356, 2.25353 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T10:45:10Z\",\n \"avg_ns\": 72676396439,\n \"stddev_ns\": 197235821,\n \"avg_ts\": 1.761241,\n \"stddev_ts\": 0.004772,\n \"samples_ns\": [ 72556040361, 72569130056, 72904018902 ],\n \"samples_ts\": [ 1.76415, 1.76384, 1.75573 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T10:41:23Z", + "avg_ns": 56799577023, + "stddev_ns": 678496, + "avg_ts": 2.253538, + "stddev_ts": 2.5e-05, + "samples_ns": [ + 56800170736, + 56798907089, + 56799653245 + ], + "samples_ts": [ + 2.25351, + 2.25356, + 2.25353 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T10:45:10Z", + "avg_ns": 72676396439, + "stddev_ns": 197235821, + "avg_ts": 1.761241, + "stddev_ts": 0.004772, + "samples_ns": [ + 72556040361, + 72569130056, + 72904018902 + ], + "samples_ts": [ + 1.76415, + 1.76384, + 1.75573 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1400 + }, + { + "timestamp_utc": "2025-12-14T11:07:43.712939+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T10:48:50Z\",\n \"avg_ns\": 56780579737,\n \"stddev_ns\": 2787815,\n \"avg_ts\": 2.254292,\n \"stddev_ts\": 0.000110,\n \"samples_ns\": [ 56783605202, 56779957667, 56778176344 ],\n \"samples_ts\": [ 2.25417, 2.25432, 2.25439 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T10:52:38Z\",\n \"avg_ns\": 301670070133,\n \"stddev_ns\": 4248960664,\n \"avg_ts\": 1.697445,\n \"stddev_ts\": 0.024104,\n \"samples_ns\": [ 296763794868, 304126908532, 304119507000 ],\n \"samples_ts\": [ 1.72528, 1.68351, 1.68355 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T10:48:50Z", + "avg_ns": 56780579737, + "stddev_ns": 2787815, + "avg_ts": 2.254292, + "stddev_ts": 0.00011, + "samples_ns": [ + 56783605202, + 56779957667, + 56778176344 + ], + "samples_ts": [ + 2.25417, + 2.25432, + 2.25439 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T10:52:38Z", + "avg_ns": 301670070133, + "stddev_ns": 4248960664, + "avg_ts": 1.697445, + "stddev_ts": 0.024104, + "samples_ns": [ + 296763794868, + 304126908532, + 304119507000 + ], + "samples_ts": [ + 1.72528, + 1.68351, + 1.68355 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1401 + }, + { + "timestamp_utc": "2025-12-14T11:26:41.546344+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T11:07:44Z\",\n \"avg_ns\": 229483648501,\n \"stddev_ns\": 6046234,\n \"avg_ts\": 2.231096,\n \"stddev_ts\": 0.000059,\n \"samples_ns\": [ 229490478361, 229479075027, 229481392116 ],\n \"samples_ts\": [ 2.23103, 2.23114, 2.23112 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T11:23:02Z\",\n \"avg_ns\": 72630981511,\n \"stddev_ns\": 50372368,\n \"avg_ts\": 1.762334,\n \"stddev_ts\": 0.001222,\n \"samples_ns\": [ 72609730777, 72594716791, 72688496965 ],\n \"samples_ts\": [ 1.76285, 1.76321, 1.76094 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T11:07:44Z", + "avg_ns": 229483648501, + "stddev_ns": 6046234, + "avg_ts": 2.231096, + "stddev_ts": 5.9e-05, + "samples_ns": [ + 229490478361, + 229479075027, + 229481392116 + ], + "samples_ts": [ + 2.23103, + 2.23114, + 2.23112 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T11:23:02Z", + "avg_ns": 72630981511, + "stddev_ns": 50372368, + "avg_ts": 1.762334, + "stddev_ts": 0.001222, + "samples_ns": [ + 72609730777, + 72594716791, + 72688496965 + ], + "samples_ts": [ + 1.76285, + 1.76321, + 1.76094 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1402 + }, + { + "timestamp_utc": "2025-12-14T11:57:06.266044+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "3", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T11:26:42Z\",\n \"avg_ns\": 229538937365,\n \"stddev_ns\": 4057356,\n \"avg_ts\": 2.230558,\n \"stddev_ts\": 0.000039,\n \"samples_ns\": [ 229537778690, 229535614690, 229543418716 ],\n \"samples_ts\": [ 2.23057, 2.23059, 2.23051 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 3,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T11:42:01Z\",\n \"avg_ns\": 301495418992,\n \"stddev_ns\": 1866399833,\n \"avg_ts\": 1.698477,\n \"stddev_ts\": 0.026620,\n \"samples_ns\": [ 296088006349, 304188709561, 304209541067 ],\n \"samples_ts\": [ 1.72922, 1.68317, 1.68305 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T11:26:42Z", + "avg_ns": 229538937365, + "stddev_ns": 4057356, + "avg_ts": 2.230558, + "stddev_ts": 3.9e-05, + "samples_ns": [ + 229537778690, + 229535614690, + 229543418716 + ], + "samples_ts": [ + 2.23057, + 2.23059, + 2.23051 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 3, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T11:42:01Z", + "avg_ns": 301495418992, + "stddev_ns": 1866399833, + "avg_ts": 1.698477, + "stddev_ts": 0.02662, + "samples_ns": [ + 296088006349, + 304188709561, + 304209541067 + ], + "samples_ts": [ + 1.72922, + 1.68317, + 1.68305 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 3, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1403 + }, + { + "timestamp_utc": "2025-12-14T12:03:28.818264+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T11:57:07Z\",\n \"avg_ns\": 43024608159,\n \"stddev_ns\": 10570457,\n \"avg_ts\": 2.975042,\n \"stddev_ts\": 0.000731,\n \"samples_ns\": [ 43028602419, 43032595165, 43012626895 ],\n \"samples_ts\": [ 2.97477, 2.97449, 2.97587 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T11:59:59Z\",\n \"avg_ns\": 69506523327,\n \"stddev_ns\": 90362484,\n \"avg_ts\": 1.841556,\n \"stddev_ts\": 0.002396,\n \"samples_ns\": [ 69402402474, 69564452747, 69552714761 ],\n \"samples_ts\": [ 1.84432, 1.84002, 1.84033 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T11:57:07Z", + "avg_ns": 43024608159, + "stddev_ns": 10570457, + "avg_ts": 2.975042, + "stddev_ts": 0.000731, + "samples_ns": [ + 43028602419, + 43032595165, + 43012626895 + ], + "samples_ts": [ + 2.97477, + 2.97449, + 2.97587 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T11:59:59Z", + "avg_ns": 69506523327, + "stddev_ns": 90362484, + "avg_ts": 1.841556, + "stddev_ts": 0.002396, + "samples_ns": [ + 69402402474, + 69564452747, + 69552714761 + ], + "samples_ts": [ + 1.84432, + 1.84002, + 1.84033 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1404 + }, + { + "timestamp_utc": "2025-12-14T12:20:25.871898+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T12:03:30Z\",\n \"avg_ns\": 43015151620,\n \"stddev_ns\": 7773033,\n \"avg_ts\": 2.975696,\n \"stddev_ts\": 0.000538,\n \"samples_ns\": [ 43021264226, 43017783382, 43006407253 ],\n \"samples_ts\": [ 2.97527, 2.97551, 2.9763 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T12:06:22Z\",\n \"avg_ns\": 281012671982,\n \"stddev_ns\": 387981159,\n \"avg_ts\": 1.821984,\n \"stddev_ts\": 0.002515,\n \"samples_ns\": [ 280655407749, 281425399206, 280957208993 ],\n \"samples_ts\": [ 1.8243, 1.81931, 1.82234 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T12:03:30Z", + "avg_ns": 43015151620, + "stddev_ns": 7773033, + "avg_ts": 2.975696, + "stddev_ts": 0.000538, + "samples_ns": [ + 43021264226, + 43017783382, + 43006407253 + ], + "samples_ts": [ + 2.97527, + 2.97551, + 2.9763 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T12:06:22Z", + "avg_ns": 281012671982, + "stddev_ns": 387981159, + "avg_ts": 1.821984, + "stddev_ts": 0.002515, + "samples_ns": [ + 280655407749, + 281425399206, + 280957208993 + ], + "samples_ts": [ + 1.8243, + 1.81931, + 1.82234 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1405 + }, + { + "timestamp_utc": "2025-12-14T12:35:26.397398+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T12:20:27Z\",\n \"avg_ns\": 172576820553,\n \"stddev_ns\": 5543443,\n \"avg_ts\": 2.966795,\n \"stddev_ts\": 0.000095,\n \"samples_ns\": [ 172571131204, 172582205551, 172577124904 ],\n \"samples_ts\": [ 2.96689, 2.9667, 2.96679 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T12:31:57Z\",\n \"avg_ns\": 69381420608,\n \"stddev_ns\": 69024869,\n \"avg_ts\": 1.844876,\n \"stddev_ts\": 0.001836,\n \"samples_ns\": [ 69303343497, 69434330216, 69406588111 ],\n \"samples_ts\": [ 1.84695, 1.84347, 1.84421 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T12:20:27Z", + "avg_ns": 172576820553, + "stddev_ns": 5543443, + "avg_ts": 2.966795, + "stddev_ts": 9.5e-05, + "samples_ns": [ + 172571131204, + 172582205551, + 172577124904 + ], + "samples_ts": [ + 2.96689, + 2.9667, + 2.96679 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T12:31:57Z", + "avg_ns": 69381420608, + "stddev_ns": 69024869, + "avg_ts": 1.844876, + "stddev_ts": 0.001836, + "samples_ns": [ + 69303343497, + 69434330216, + 69406588111 + ], + "samples_ts": [ + 1.84695, + 1.84347, + 1.84421 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1406 + }, + { + "timestamp_utc": "2025-12-14T13:01:01.975684+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T12:35:27Z\",\n \"avg_ns\": 172586715089,\n \"stddev_ns\": 24010304,\n \"avg_ts\": 2.966625,\n \"stddev_ts\": 0.000413,\n \"samples_ns\": [ 172578611278, 172567809026, 172613724964 ],\n \"samples_ts\": [ 2.96676, 2.96695, 2.96616 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T12:46:58Z\",\n \"avg_ns\": 281075401937,\n \"stddev_ns\": 206927146,\n \"avg_ts\": 1.821576,\n \"stddev_ts\": 0.001341,\n \"samples_ns\": [ 281246868151, 281133781102, 280845556558 ],\n \"samples_ts\": [ 1.82046, 1.8212, 1.82307 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T12:35:27Z", + "avg_ns": 172586715089, + "stddev_ns": 24010304, + "avg_ts": 2.966625, + "stddev_ts": 0.000413, + "samples_ns": [ + 172578611278, + 172567809026, + 172613724964 + ], + "samples_ts": [ + 2.96676, + 2.96695, + 2.96616 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T12:46:58Z", + "avg_ns": 281075401937, + "stddev_ns": 206927146, + "avg_ts": 1.821576, + "stddev_ts": 0.001341, + "samples_ns": [ + 281246868151, + 281133781102, + 280845556558 + ], + "samples_ts": [ + 1.82046, + 1.8212, + 1.82307 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1407 + }, + { + "timestamp_utc": "2025-12-14T13:07:24.669640+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T13:01:03Z\",\n \"avg_ns\": 43013159669,\n \"stddev_ns\": 3734203,\n \"avg_ts\": 2.975833,\n \"stddev_ts\": 0.000258,\n \"samples_ns\": [ 43012258912, 43017250066, 43009970031 ],\n \"samples_ts\": [ 2.9759, 2.97555, 2.97605 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T13:03:55Z\",\n \"avg_ns\": 69546374160,\n \"stddev_ns\": 46103625,\n \"avg_ts\": 1.840499,\n \"stddev_ts\": 0.001220,\n \"samples_ns\": [ 69531389167, 69509628748, 69598104567 ],\n \"samples_ts\": [ 1.8409, 1.84147, 1.83913 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T13:01:03Z", + "avg_ns": 43013159669, + "stddev_ns": 3734203, + "avg_ts": 2.975833, + "stddev_ts": 0.000258, + "samples_ns": [ + 43012258912, + 43017250066, + 43009970031 + ], + "samples_ts": [ + 2.9759, + 2.97555, + 2.97605 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T13:03:55Z", + "avg_ns": 69546374160, + "stddev_ns": 46103625, + "avg_ts": 1.840499, + "stddev_ts": 0.00122, + "samples_ns": [ + 69531389167, + 69509628748, + 69598104567 + ], + "samples_ts": [ + 1.8409, + 1.84147, + 1.83913 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1408 + }, + { + "timestamp_utc": "2025-12-14T13:24:21.874648+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T13:07:25Z\",\n \"avg_ns\": 43022002638,\n \"stddev_ns\": 5667767,\n \"avg_ts\": 2.975222,\n \"stddev_ts\": 0.000392,\n \"samples_ns\": [ 43017096216, 43020705001, 43028206697 ],\n \"samples_ts\": [ 2.97556, 2.97531, 2.97479 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T13:10:18Z\",\n \"avg_ns\": 281049500647,\n \"stddev_ns\": 3121251656,\n \"avg_ts\": 1.821751,\n \"stddev_ts\": 0.004676,\n \"samples_ns\": [ 280219420270, 281509980836, 281419100835 ],\n \"samples_ts\": [ 1.82714, 1.81876, 1.81935 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T13:07:25Z", + "avg_ns": 43022002638, + "stddev_ns": 5667767, + "avg_ts": 2.975222, + "stddev_ts": 0.000392, + "samples_ns": [ + 43017096216, + 43020705001, + 43028206697 + ], + "samples_ts": [ + 2.97556, + 2.97531, + 2.97479 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T13:10:18Z", + "avg_ns": 281049500647, + "stddev_ns": 3121251656, + "avg_ts": 1.821751, + "stddev_ts": 0.004676, + "samples_ns": [ + 280219420270, + 281509980836, + 281419100835 + ], + "samples_ts": [ + 1.82714, + 1.81876, + 1.81935 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1409 + }, + { + "timestamp_utc": "2025-12-14T13:39:24.007542+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T13:24:23Z\",\n \"avg_ns\": 173022288825,\n \"stddev_ns\": 19871589,\n \"avg_ts\": 2.959156,\n \"stddev_ts\": 0.000340,\n \"samples_ns\": [ 173042950511, 173003315000, 173020600964 ],\n \"samples_ts\": [ 2.9588, 2.95948, 2.95919 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T13:35:55Z\",\n \"avg_ns\": 69364412357,\n \"stddev_ns\": 252618351,\n \"avg_ts\": 1.845343,\n \"stddev_ts\": 0.006735,\n \"samples_ns\": [ 69072714442, 69509735550, 69510787079 ],\n \"samples_ts\": [ 1.85312, 1.84147, 1.84144 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T13:24:23Z", + "avg_ns": 173022288825, + "stddev_ns": 19871589, + "avg_ts": 2.959156, + "stddev_ts": 0.00034, + "samples_ns": [ + 173042950511, + 173003315000, + 173020600964 + ], + "samples_ts": [ + 2.9588, + 2.95948, + 2.95919 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T13:35:55Z", + "avg_ns": 69364412357, + "stddev_ns": 252618351, + "avg_ts": 1.845343, + "stddev_ts": 0.006735, + "samples_ns": [ + 69072714442, + 69509735550, + 69510787079 + ], + "samples_ts": [ + 1.85312, + 1.84147, + 1.84144 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1410 + }, + { + "timestamp_utc": "2025-12-14T14:05:03.714798+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T13:39:25Z\",\n \"avg_ns\": 173003453379,\n \"stddev_ns\": 24161003,\n \"avg_ts\": 2.959479,\n \"stddev_ts\": 0.000413,\n \"samples_ns\": [ 172977191345, 173008451917, 173024716877 ],\n \"samples_ts\": [ 2.95993, 2.95939, 2.95911 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T13:50:57Z\",\n \"avg_ns\": 281914588972,\n \"stddev_ns\": 72551000,\n \"avg_ts\": 1.816153,\n \"stddev_ts\": 0.000467,\n \"samples_ns\": [ 281901059947, 281849754799, 281992952170 ],\n \"samples_ts\": [ 1.81624, 1.81657, 1.81565 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T13:39:25Z", + "avg_ns": 173003453379, + "stddev_ns": 24161003, + "avg_ts": 2.959479, + "stddev_ts": 0.000413, + "samples_ns": [ + 172977191345, + 173008451917, + 173024716877 + ], + "samples_ts": [ + 2.95993, + 2.95939, + 2.95911 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T13:50:57Z", + "avg_ns": 281914588972, + "stddev_ns": 72551000, + "avg_ts": 1.816153, + "stddev_ts": 0.000467, + "samples_ns": [ + 281901059947, + 281849754799, + 281992952170 + ], + "samples_ts": [ + 1.81624, + 1.81657, + 1.81565 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1411 + }, + { + "timestamp_utc": "2025-12-14T14:11:26.074459+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T14:05:04Z\",\n \"avg_ns\": 43016346330,\n \"stddev_ns\": 6471825,\n \"avg_ts\": 2.975613,\n \"stddev_ts\": 0.000448,\n \"samples_ns\": [ 43022392000, 43009519296, 43017127694 ],\n \"samples_ts\": [ 2.97519, 2.97609, 2.97556 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T14:07:57Z\",\n \"avg_ns\": 69445493644,\n \"stddev_ns\": 185478138,\n \"avg_ts\": 1.843181,\n \"stddev_ts\": 0.004930,\n \"samples_ns\": [ 69233670483, 69578797761, 69524012688 ],\n \"samples_ts\": [ 1.84881, 1.83964, 1.84109 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T14:05:04Z", + "avg_ns": 43016346330, + "stddev_ns": 6471825, + "avg_ts": 2.975613, + "stddev_ts": 0.000448, + "samples_ns": [ + 43022392000, + 43009519296, + 43017127694 + ], + "samples_ts": [ + 2.97519, + 2.97609, + 2.97556 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T14:07:57Z", + "avg_ns": 69445493644, + "stddev_ns": 185478138, + "avg_ts": 1.843181, + "stddev_ts": 0.00493, + "samples_ns": [ + 69233670483, + 69578797761, + 69524012688 + ], + "samples_ts": [ + 1.84881, + 1.83964, + 1.84109 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1412 + }, + { + "timestamp_utc": "2025-12-14T14:28:24.980753+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T14:11:27Z\",\n \"avg_ns\": 43019824853,\n \"stddev_ns\": 7472925,\n \"avg_ts\": 2.975372,\n \"stddev_ts\": 0.000517,\n \"samples_ns\": [ 43027428940, 43012496482, 43019549138 ],\n \"samples_ts\": [ 2.97485, 2.97588, 2.97539 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T14:14:19Z\",\n \"avg_ns\": 281614302513,\n \"stddev_ns\": 702395411,\n \"avg_ts\": 1.818097,\n \"stddev_ts\": 0.004541,\n \"samples_ns\": [ 280807321259, 282088113652, 281947472629 ],\n \"samples_ts\": [ 1.82331, 1.81504, 1.81594 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T14:11:27Z", + "avg_ns": 43019824853, + "stddev_ns": 7472925, + "avg_ts": 2.975372, + "stddev_ts": 0.000517, + "samples_ns": [ + 43027428940, + 43012496482, + 43019549138 + ], + "samples_ts": [ + 2.97485, + 2.97588, + 2.97539 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T14:14:19Z", + "avg_ns": 281614302513, + "stddev_ns": 702395411, + "avg_ts": 1.818097, + "stddev_ts": 0.004541, + "samples_ns": [ + 280807321259, + 282088113652, + 281947472629 + ], + "samples_ts": [ + 1.82331, + 1.81504, + 1.81594 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1413 + }, + { + "timestamp_utc": "2025-12-14T14:43:30.872295+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T14:28:26Z\",\n \"avg_ns\": 173892861109,\n \"stddev_ns\": 7892556,\n \"avg_ts\": 2.944342,\n \"stddev_ts\": 0.000133,\n \"samples_ns\": [ 173899056285, 173884005092, 173895521952 ],\n \"samples_ts\": [ 2.94424, 2.94449, 2.9443 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T14:40:01Z\",\n \"avg_ns\": 69445336563,\n \"stddev_ns\": 182240676,\n \"avg_ts\": 1.843185,\n \"stddev_ts\": 0.004844,\n \"samples_ns\": [ 69234930273, 69547626068, 69553453349 ],\n \"samples_ts\": [ 1.84878, 1.84047, 1.84031 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T14:28:26Z", + "avg_ns": 173892861109, + "stddev_ns": 7892556, + "avg_ts": 2.944342, + "stddev_ts": 0.000133, + "samples_ns": [ + 173899056285, + 173884005092, + 173895521952 + ], + "samples_ts": [ + 2.94424, + 2.94449, + 2.9443 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T14:40:01Z", + "avg_ns": 69445336563, + "stddev_ns": 182240676, + "avg_ts": 1.843185, + "stddev_ts": 0.004844, + "samples_ns": [ + 69234930273, + 69547626068, + 69553453349 + ], + "samples_ts": [ + 1.84878, + 1.84047, + 1.84031 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1414 + }, + { + "timestamp_utc": "2025-12-14T15:09:08.377401+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "512", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T14:43:32Z\",\n \"avg_ns\": 173871589420,\n \"stddev_ns\": 8646544,\n \"avg_ts\": 2.944702,\n \"stddev_ts\": 0.000146,\n \"samples_ns\": [ 173862316277, 173873021409, 173879430574 ],\n \"samples_ts\": [ 2.94486, 2.94468, 2.94457 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 512,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T14:55:07Z\",\n \"avg_ns\": 280013328585,\n \"stddev_ns\": 3950323435,\n \"avg_ts\": 1.828584,\n \"stddev_ts\": 0.016583,\n \"samples_ns\": [ 281447833125, 281495696844, 277096455787 ],\n \"samples_ts\": [ 1.81916, 1.81886, 1.84773 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T14:43:32Z", + "avg_ns": 173871589420, + "stddev_ns": 8646544, + "avg_ts": 2.944702, + "stddev_ts": 0.000146, + "samples_ns": [ + 173862316277, + 173873021409, + 173879430574 + ], + "samples_ts": [ + 2.94486, + 2.94468, + 2.94457 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 512, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T14:55:07Z", + "avg_ns": 280013328585, + "stddev_ns": 3950323435, + "avg_ts": 1.828584, + "stddev_ts": 0.016583, + "samples_ns": [ + 281447833125, + 281495696844, + 277096455787 + ], + "samples_ts": [ + 1.81916, + 1.81886, + 1.84773 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 512, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1415 + }, + { + "timestamp_utc": "2025-12-14T15:15:30.311954+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T15:09:09Z\",\n \"avg_ns\": 43014313938,\n \"stddev_ns\": 10929938,\n \"avg_ts\": 2.975754,\n \"stddev_ts\": 0.000756,\n \"samples_ns\": [ 43022555057, 43018471454, 43001915303 ],\n \"samples_ts\": [ 2.97518, 2.97547, 2.97661 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T15:12:01Z\",\n \"avg_ns\": 69303662720,\n \"stddev_ns\": 284406908,\n \"avg_ts\": 1.846965,\n \"stddev_ts\": 0.007598,\n \"samples_ns\": [ 68975261448, 69469129979, 69466596735 ],\n \"samples_ts\": [ 1.85574, 1.84255, 1.84261 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T15:09:09Z", + "avg_ns": 43014313938, + "stddev_ns": 10929938, + "avg_ts": 2.975754, + "stddev_ts": 0.000756, + "samples_ns": [ + 43022555057, + 43018471454, + 43001915303 + ], + "samples_ts": [ + 2.97518, + 2.97547, + 2.97661 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T15:12:01Z", + "avg_ns": 69303662720, + "stddev_ns": 284406908, + "avg_ts": 1.846965, + "stddev_ts": 0.007598, + "samples_ns": [ + 68975261448, + 69469129979, + 69466596735 + ], + "samples_ts": [ + 1.85574, + 1.84255, + 1.84261 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1416 + }, + { + "timestamp_utc": "2025-12-14T15:32:27.252375+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T15:15:31Z\",\n \"avg_ns\": 43034353794,\n \"stddev_ns\": 5416943,\n \"avg_ts\": 2.974368,\n \"stddev_ts\": 0.000374,\n \"samples_ns\": [ 43028288757, 43036077982, 43038694644 ],\n \"samples_ts\": [ 2.97479, 2.97425, 2.97407 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T15:18:23Z\",\n \"avg_ns\": 280946536035,\n \"stddev_ns\": 752205194,\n \"avg_ts\": 1.822420,\n \"stddev_ts\": 0.004887,\n \"samples_ns\": [ 281329045325, 280079946187, 281430616593 ],\n \"samples_ts\": [ 1.81993, 1.82805, 1.81928 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T15:15:31Z", + "avg_ns": 43034353794, + "stddev_ns": 5416943, + "avg_ts": 2.974368, + "stddev_ts": 0.000374, + "samples_ns": [ + 43028288757, + 43036077982, + 43038694644 + ], + "samples_ts": [ + 2.97479, + 2.97425, + 2.97407 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T15:18:23Z", + "avg_ns": 280946536035, + "stddev_ns": 752205194, + "avg_ts": 1.82242, + "stddev_ts": 0.004887, + "samples_ns": [ + 281329045325, + 280079946187, + 281430616593 + ], + "samples_ts": [ + 1.81993, + 1.82805, + 1.81928 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1417 + }, + { + "timestamp_utc": "2025-12-14T15:47:27.014290+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T15:32:28Z\",\n \"avg_ns\": 172596303630,\n \"stddev_ns\": 25959598,\n \"avg_ts\": 2.966460,\n \"stddev_ts\": 0.000446,\n \"samples_ns\": [ 172620041622, 172568588217, 172600281052 ],\n \"samples_ts\": [ 2.96605, 2.96694, 2.96639 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T15:43:58Z\",\n \"avg_ns\": 69124055789,\n \"stddev_ns\": 510076555,\n \"avg_ts\": 1.851811,\n \"stddev_ts\": 0.013722,\n \"samples_ns\": [ 68536366275, 69451720645, 69384080448 ],\n \"samples_ts\": [ 1.86762, 1.84301, 1.8448 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T15:32:28Z", + "avg_ns": 172596303630, + "stddev_ns": 25959598, + "avg_ts": 2.96646, + "stddev_ts": 0.000446, + "samples_ns": [ + 172620041622, + 172568588217, + 172600281052 + ], + "samples_ts": [ + 2.96605, + 2.96694, + 2.96639 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T15:43:58Z", + "avg_ns": 69124055789, + "stddev_ns": 510076555, + "avg_ts": 1.851811, + "stddev_ts": 0.013722, + "samples_ns": [ + 68536366275, + 69451720645, + 69384080448 + ], + "samples_ts": [ + 1.86762, + 1.84301, + 1.8448 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1418 + }, + { + "timestamp_utc": "2025-12-14T16:13:04.157264+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T15:47:28Z\",\n \"avg_ns\": 172548527994,\n \"stddev_ns\": 26302787,\n \"avg_ts\": 2.967281,\n \"stddev_ts\": 0.000452,\n \"samples_ns\": [ 172535174400, 172531588004, 172578821580 ],\n \"samples_ts\": [ 2.96751, 2.96757, 2.96676 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T15:58:58Z\",\n \"avg_ns\": 281626143894,\n \"stddev_ns\": 238034164,\n \"avg_ts\": 1.818014,\n \"stddev_ts\": 0.001536,\n \"samples_ns\": [ 281418221386, 281574424589, 281885785707 ],\n \"samples_ts\": [ 1.81936, 1.81835, 1.81634 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T15:47:28Z", + "avg_ns": 172548527994, + "stddev_ns": 26302787, + "avg_ts": 2.967281, + "stddev_ts": 0.000452, + "samples_ns": [ + 172535174400, + 172531588004, + 172578821580 + ], + "samples_ts": [ + 2.96751, + 2.96757, + 2.96676 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T15:58:58Z", + "avg_ns": 281626143894, + "stddev_ns": 238034164, + "avg_ts": 1.818014, + "stddev_ts": 0.001536, + "samples_ns": [ + 281418221386, + 281574424589, + 281885785707 + ], + "samples_ts": [ + 1.81936, + 1.81835, + 1.81634 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1419 + }, + { + "timestamp_utc": "2025-12-14T16:19:26.713625+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T16:13:05Z\",\n \"avg_ns\": 43028082545,\n \"stddev_ns\": 5458745,\n \"avg_ts\": 2.974801,\n \"stddev_ts\": 0.000377,\n \"samples_ns\": [ 43027087322, 43033966438, 43023193876 ],\n \"samples_ts\": [ 2.97487, 2.97439, 2.97514 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T16:15:57Z\",\n \"avg_ns\": 69488701787,\n \"stddev_ns\": 36720464,\n \"avg_ts\": 1.842026,\n \"stddev_ts\": 0.000974,\n \"samples_ns\": [ 69449068577, 69495471191, 69521565594 ],\n \"samples_ts\": [ 1.84308, 1.84185, 1.84116 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T16:13:05Z", + "avg_ns": 43028082545, + "stddev_ns": 5458745, + "avg_ts": 2.974801, + "stddev_ts": 0.000377, + "samples_ns": [ + 43027087322, + 43033966438, + 43023193876 + ], + "samples_ts": [ + 2.97487, + 2.97439, + 2.97514 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T16:15:57Z", + "avg_ns": 69488701787, + "stddev_ns": 36720464, + "avg_ts": 1.842026, + "stddev_ts": 0.000974, + "samples_ns": [ + 69449068577, + 69495471191, + 69521565594 + ], + "samples_ts": [ + 1.84308, + 1.84185, + 1.84116 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1420 + }, + { + "timestamp_utc": "2025-12-14T16:36:24.695952+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T16:19:27Z\",\n \"avg_ns\": 43025487798,\n \"stddev_ns\": 7278431,\n \"avg_ts\": 2.974981,\n \"stddev_ts\": 0.000503,\n \"samples_ns\": [ 43033436492, 43023877612, 43019149290 ],\n \"samples_ts\": [ 2.97443, 2.97509, 2.97542 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T16:22:20Z\",\n \"avg_ns\": 281301883171,\n \"stddev_ns\": 349714730,\n \"avg_ts\": 1.820111,\n \"stddev_ts\": 0.002264,\n \"samples_ns\": [ 280898070995, 281505169775, 281502408744 ],\n \"samples_ts\": [ 1.82273, 1.81879, 1.81881 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T16:19:27Z", + "avg_ns": 43025487798, + "stddev_ns": 7278431, + "avg_ts": 2.974981, + "stddev_ts": 0.000503, + "samples_ns": [ + 43033436492, + 43023877612, + 43019149290 + ], + "samples_ts": [ + 2.97443, + 2.97509, + 2.97542 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T16:22:20Z", + "avg_ns": 281301883171, + "stddev_ns": 349714730, + "avg_ts": 1.820111, + "stddev_ts": 0.002264, + "samples_ns": [ + 280898070995, + 281505169775, + 281502408744 + ], + "samples_ts": [ + 1.82273, + 1.81879, + 1.81881 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1421 + }, + { + "timestamp_utc": "2025-12-14T16:51:27.799789+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T16:36:25Z\",\n \"avg_ns\": 173009486359,\n \"stddev_ns\": 5214657,\n \"avg_ts\": 2.959375,\n \"stddev_ts\": 0.000089,\n \"samples_ns\": [ 173011367536, 173013481916, 173003609626 ],\n \"samples_ts\": [ 2.95934, 2.95931, 2.95948 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T16:47:58Z\",\n \"avg_ns\": 69695357260,\n \"stddev_ns\": 293044311,\n \"avg_ts\": 1.836586,\n \"stddev_ts\": 0.007704,\n \"samples_ns\": [ 70033177711, 69509627227, 69543266843 ],\n \"samples_ts\": [ 1.82771, 1.84147, 1.84058 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T16:36:25Z", + "avg_ns": 173009486359, + "stddev_ns": 5214657, + "avg_ts": 2.959375, + "stddev_ts": 8.9e-05, + "samples_ns": [ + 173011367536, + 173013481916, + 173003609626 + ], + "samples_ts": [ + 2.95934, + 2.95931, + 2.95948 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T16:47:58Z", + "avg_ns": 69695357260, + "stddev_ns": 293044311, + "avg_ts": 1.836586, + "stddev_ts": 0.007704, + "samples_ns": [ + 70033177711, + 69509627227, + 69543266843 + ], + "samples_ts": [ + 1.82771, + 1.84147, + 1.84058 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1422 + }, + { + "timestamp_utc": "2025-12-14T17:17:05.801672+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T16:51:29Z\",\n \"avg_ns\": 172998261815,\n \"stddev_ns\": 6033422,\n \"avg_ts\": 2.959567,\n \"stddev_ts\": 0.000103,\n \"samples_ns\": [ 172993922557, 172995748037, 173005114853 ],\n \"samples_ts\": [ 2.95964, 2.95961, 2.95945 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T17:03:01Z\",\n \"avg_ns\": 281346757824,\n \"stddev_ns\": 77107202,\n \"avg_ts\": 1.819818,\n \"stddev_ts\": 0.000499,\n \"samples_ns\": [ 281260693306, 281409529182, 281370050986 ],\n \"samples_ts\": [ 1.82038, 1.81941, 1.81967 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T16:51:29Z", + "avg_ns": 172998261815, + "stddev_ns": 6033422, + "avg_ts": 2.959567, + "stddev_ts": 0.000103, + "samples_ns": [ + 172993922557, + 172995748037, + 173005114853 + ], + "samples_ts": [ + 2.95964, + 2.95961, + 2.95945 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T17:03:01Z", + "avg_ns": 281346757824, + "stddev_ns": 77107202, + "avg_ts": 1.819818, + "stddev_ts": 0.000499, + "samples_ns": [ + 281260693306, + 281409529182, + 281370050986 + ], + "samples_ts": [ + 1.82038, + 1.81941, + 1.81967 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1423 + }, + { + "timestamp_utc": "2025-12-14T17:23:28.482951+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T17:17:07Z\",\n \"avg_ns\": 43025780336,\n \"stddev_ns\": 4350834,\n \"avg_ts\": 2.974961,\n \"stddev_ts\": 0.000301,\n \"samples_ns\": [ 43023373342, 43030802805, 43023164861 ],\n \"samples_ts\": [ 2.97513, 2.97461, 2.97514 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T17:19:59Z\",\n \"avg_ns\": 69536699245,\n \"stddev_ns\": 43580755,\n \"avg_ts\": 1.840755,\n \"stddev_ts\": 0.001154,\n \"samples_ns\": [ 69486450954, 69559483505, 69564163278 ],\n \"samples_ts\": [ 1.84209, 1.84015, 1.84003 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T17:17:07Z", + "avg_ns": 43025780336, + "stddev_ns": 4350834, + "avg_ts": 2.974961, + "stddev_ts": 0.000301, + "samples_ns": [ + 43023373342, + 43030802805, + 43023164861 + ], + "samples_ts": [ + 2.97513, + 2.97461, + 2.97514 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T17:19:59Z", + "avg_ns": 69536699245, + "stddev_ns": 43580755, + "avg_ts": 1.840755, + "stddev_ts": 0.001154, + "samples_ns": [ + 69486450954, + 69559483505, + 69564163278 + ], + "samples_ts": [ + 1.84209, + 1.84015, + 1.84003 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1424 + }, + { + "timestamp_utc": "2025-12-14T17:40:26.356285+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T17:23:29Z\",\n \"avg_ns\": 43016991103,\n \"stddev_ns\": 2744313,\n \"avg_ts\": 2.975568,\n \"stddev_ts\": 0.000190,\n \"samples_ns\": [ 43014988864, 43020119318, 43015865127 ],\n \"samples_ts\": [ 2.97571, 2.97535, 2.97565 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T17:26:21Z\",\n \"avg_ns\": 281273087729,\n \"stddev_ns\": 324452585,\n \"avg_ts\": 1.820297,\n \"stddev_ts\": 0.002101,\n \"samples_ns\": [ 280904920766, 281517248625, 281397093796 ],\n \"samples_ts\": [ 1.82268, 1.81872, 1.81949 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T17:23:29Z", + "avg_ns": 43016991103, + "stddev_ns": 2744313, + "avg_ts": 2.975568, + "stddev_ts": 0.00019, + "samples_ns": [ + 43014988864, + 43020119318, + 43015865127 + ], + "samples_ts": [ + 2.97571, + 2.97535, + 2.97565 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T17:26:21Z", + "avg_ns": 281273087729, + "stddev_ns": 324452585, + "avg_ts": 1.820297, + "stddev_ts": 0.002101, + "samples_ns": [ + 280904920766, + 281517248625, + 281397093796 + ], + "samples_ts": [ + 1.82268, + 1.81872, + 1.81949 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1425 + }, + { + "timestamp_utc": "2025-12-14T17:55:32.663584+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T17:40:27Z\",\n \"avg_ns\": 173922037528,\n \"stddev_ns\": 42306682,\n \"avg_ts\": 2.943848,\n \"stddev_ts\": 0.000716,\n \"samples_ns\": [ 173873188174, 173946863237, 173946061173 ],\n \"samples_ts\": [ 2.94467, 2.94343, 2.94344 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T17:52:03Z\",\n \"avg_ns\": 69556953017,\n \"stddev_ns\": 83929688,\n \"avg_ts\": 1.840220,\n \"stddev_ts\": 0.002222,\n \"samples_ns\": [ 69460039897, 69605317938, 69605501217 ],\n \"samples_ts\": [ 1.84279, 1.83894, 1.83894 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T17:40:27Z", + "avg_ns": 173922037528, + "stddev_ns": 42306682, + "avg_ts": 2.943848, + "stddev_ts": 0.000716, + "samples_ns": [ + 173873188174, + 173946863237, + 173946061173 + ], + "samples_ts": [ + 2.94467, + 2.94343, + 2.94344 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T17:52:03Z", + "avg_ns": 69556953017, + "stddev_ns": 83929688, + "avg_ts": 1.84022, + "stddev_ts": 0.002222, + "samples_ns": [ + 69460039897, + 69605317938, + 69605501217 + ], + "samples_ts": [ + 1.84279, + 1.83894, + 1.83894 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1426 + }, + { + "timestamp_utc": "2025-12-14T18:21:15.003312+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "1024", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T17:55:33Z\",\n \"avg_ns\": 173961452828,\n \"stddev_ns\": 8633254,\n \"avg_ts\": 2.943181,\n \"stddev_ts\": 0.000146,\n \"samples_ns\": [ 173955036815, 173958053326, 173971268343 ],\n \"samples_ts\": [ 2.94329, 2.94324, 2.94301 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 1024,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T18:07:09Z\",\n \"avg_ns\": 281493039118,\n \"stddev_ns\": 153790575,\n \"avg_ts\": 1.818873,\n \"stddev_ts\": 0.000994,\n \"samples_ns\": [ 281317245918, 281559161780, 281602709656 ],\n \"samples_ts\": [ 1.82001, 1.81845, 1.81816 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T17:55:33Z", + "avg_ns": 173961452828, + "stddev_ns": 8633254, + "avg_ts": 2.943181, + "stddev_ts": 0.000146, + "samples_ns": [ + 173955036815, + 173958053326, + 173971268343 + ], + "samples_ts": [ + 2.94329, + 2.94324, + 2.94301 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 1024, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T18:07:09Z", + "avg_ns": 281493039118, + "stddev_ns": 153790575, + "avg_ts": 1.818873, + "stddev_ts": 0.000994, + "samples_ns": [ + 281317245918, + 281559161780, + 281602709656 + ], + "samples_ts": [ + 1.82001, + 1.81845, + 1.81816 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 1024, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1427 + }, + { + "timestamp_utc": "2025-12-14T18:27:37.866307+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T18:21:16Z\",\n \"avg_ns\": 43036786548,\n \"stddev_ns\": 12665538,\n \"avg_ts\": 2.974200,\n \"stddev_ts\": 0.000875,\n \"samples_ns\": [ 43045961863, 43042059572, 43022338210 ],\n \"samples_ts\": [ 2.97357, 2.97384, 2.9752 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T18:24:08Z\",\n \"avg_ns\": 69588553300,\n \"stddev_ns\": 90738613,\n \"avg_ts\": 1.839385,\n \"stddev_ts\": 0.002400,\n \"samples_ns\": [ 69483851872, 69637503301, 69644304729 ],\n \"samples_ts\": [ 1.84215, 1.83809, 1.83791 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T18:21:16Z", + "avg_ns": 43036786548, + "stddev_ns": 12665538, + "avg_ts": 2.9742, + "stddev_ts": 0.000875, + "samples_ns": [ + 43045961863, + 43042059572, + 43022338210 + ], + "samples_ts": [ + 2.97357, + 2.97384, + 2.9752 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T18:24:08Z", + "avg_ns": 69588553300, + "stddev_ns": 90738613, + "avg_ts": 1.839385, + "stddev_ts": 0.0024, + "samples_ns": [ + 69483851872, + 69637503301, + 69644304729 + ], + "samples_ts": [ + 1.84215, + 1.83809, + 1.83791 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1428 + }, + { + "timestamp_utc": "2025-12-14T18:44:34.807625+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T18:27:39Z\",\n \"avg_ns\": 43030881310,\n \"stddev_ns\": 4773789,\n \"avg_ts\": 2.974608,\n \"stddev_ts\": 0.000330,\n \"samples_ns\": [ 43030978451, 43035605788, 43026059691 ],\n \"samples_ts\": [ 2.9746, 2.97428, 2.97494 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T18:30:31Z\",\n \"avg_ns\": 280954900498,\n \"stddev_ns\": 1100468093,\n \"avg_ts\": 1.822375,\n \"stddev_ts\": 0.007154,\n \"samples_ns\": [ 279685641516, 281536940176, 281642119804 ],\n \"samples_ts\": [ 1.83063, 1.81859, 1.81791 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T18:27:39Z", + "avg_ns": 43030881310, + "stddev_ns": 4773789, + "avg_ts": 2.974608, + "stddev_ts": 0.00033, + "samples_ns": [ + 43030978451, + 43035605788, + 43026059691 + ], + "samples_ts": [ + 2.9746, + 2.97428, + 2.97494 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T18:30:31Z", + "avg_ns": 280954900498, + "stddev_ns": 1100468093, + "avg_ts": 1.822375, + "stddev_ts": 0.007154, + "samples_ns": [ + 279685641516, + 281536940176, + 281642119804 + ], + "samples_ts": [ + 1.83063, + 1.81859, + 1.81791 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1429 + }, + { + "timestamp_utc": "2025-12-14T18:59:35.431539+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T18:44:36Z\",\n \"avg_ns\": 172586556361,\n \"stddev_ns\": 8078185,\n \"avg_ts\": 2.966627,\n \"stddev_ts\": 0.000139,\n \"samples_ns\": [ 172577298158, 172590291733, 172592079193 ],\n \"samples_ts\": [ 2.96679, 2.96656, 2.96653 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T18:56:06Z\",\n \"avg_ns\": 69440651994,\n \"stddev_ns\": 14006777,\n \"avg_ts\": 1.843301,\n \"stddev_ts\": 0.000372,\n \"samples_ns\": [ 69426611101, 69454614409, 69440730474 ],\n \"samples_ts\": [ 1.84367, 1.84293, 1.8433 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T18:44:36Z", + "avg_ns": 172586556361, + "stddev_ns": 8078185, + "avg_ts": 2.966627, + "stddev_ts": 0.000139, + "samples_ns": [ + 172577298158, + 172590291733, + 172592079193 + ], + "samples_ts": [ + 2.96679, + 2.96656, + 2.96653 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T18:56:06Z", + "avg_ns": 69440651994, + "stddev_ns": 14006777, + "avg_ts": 1.843301, + "stddev_ts": 0.000372, + "samples_ns": [ + 69426611101, + 69454614409, + 69440730474 + ], + "samples_ts": [ + 1.84367, + 1.84293, + 1.8433 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1430 + }, + { + "timestamp_utc": "2025-12-14T19:25:13.056788+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "128", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T18:59:36Z\",\n \"avg_ns\": 172650758755,\n \"stddev_ns\": 9942946,\n \"avg_ts\": 2.965524,\n \"stddev_ts\": 0.000170,\n \"samples_ns\": [ 172656813127, 172656159222, 172639303918 ],\n \"samples_ts\": [ 2.96542, 2.96543, 2.96572 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 128,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T19:11:07Z\",\n \"avg_ns\": 281670650921,\n \"stddev_ns\": 72882510,\n \"avg_ts\": 1.817726,\n \"stddev_ts\": 0.000470,\n \"samples_ns\": [ 281734945338, 281591479787, 281685527639 ],\n \"samples_ts\": [ 1.81731, 1.81824, 1.81763 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T18:59:36Z", + "avg_ns": 172650758755, + "stddev_ns": 9942946, + "avg_ts": 2.965524, + "stddev_ts": 0.00017, + "samples_ns": [ + 172656813127, + 172656159222, + 172639303918 + ], + "samples_ts": [ + 2.96542, + 2.96543, + 2.96572 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 128, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T19:11:07Z", + "avg_ns": 281670650921, + "stddev_ns": 72882510, + "avg_ts": 1.817726, + "stddev_ts": 0.00047, + "samples_ns": [ + 281734945338, + 281591479787, + 281685527639 + ], + "samples_ts": [ + 1.81731, + 1.81824, + 1.81763 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 128, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1431 + }, + { + "timestamp_utc": "2025-12-14T19:31:35.090569+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T19:25:14Z\",\n \"avg_ns\": 43028685834,\n \"stddev_ns\": 8954451,\n \"avg_ts\": 2.974760,\n \"stddev_ts\": 0.000619,\n \"samples_ns\": [ 43020514323, 43027292910, 43038250271 ],\n \"samples_ts\": [ 2.97532, 2.97486, 2.9741 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T19:28:06Z\",\n \"avg_ns\": 69311099973,\n \"stddev_ns\": 250003497,\n \"avg_ts\": 1.846762,\n \"stddev_ts\": 0.006675,\n \"samples_ns\": [ 69022610471, 69464398203, 69446291247 ],\n \"samples_ts\": [ 1.85446, 1.84267, 1.84315 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T19:25:14Z", + "avg_ns": 43028685834, + "stddev_ns": 8954451, + "avg_ts": 2.97476, + "stddev_ts": 0.000619, + "samples_ns": [ + 43020514323, + 43027292910, + 43038250271 + ], + "samples_ts": [ + 2.97532, + 2.97486, + 2.9741 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T19:28:06Z", + "avg_ns": 69311099973, + "stddev_ns": 250003497, + "avg_ts": 1.846762, + "stddev_ts": 0.006675, + "samples_ns": [ + 69022610471, + 69464398203, + 69446291247 + ], + "samples_ts": [ + 1.85446, + 1.84267, + 1.84315 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1432 + }, + { + "timestamp_utc": "2025-12-14T19:48:33.338531+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T19:31:36Z\",\n \"avg_ns\": 43025541099,\n \"stddev_ns\": 6347589,\n \"avg_ts\": 2.974977,\n \"stddev_ts\": 0.000438,\n \"samples_ns\": [ 43027575467, 43030615045, 43018432787 ],\n \"samples_ts\": [ 2.97484, 2.97463, 2.97547 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T19:34:28Z\",\n \"avg_ns\": 281374070034,\n \"stddev_ns\": 417207759,\n \"avg_ts\": 1.819644,\n \"stddev_ts\": 0.002700,\n \"samples_ns\": [ 280892864249, 281594854319, 281634491535 ],\n \"samples_ts\": [ 1.82276, 1.81822, 1.81796 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T19:31:36Z", + "avg_ns": 43025541099, + "stddev_ns": 6347589, + "avg_ts": 2.974977, + "stddev_ts": 0.000438, + "samples_ns": [ + 43027575467, + 43030615045, + 43018432787 + ], + "samples_ts": [ + 2.97484, + 2.97463, + 2.97547 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T19:34:28Z", + "avg_ns": 281374070034, + "stddev_ns": 417207759, + "avg_ts": 1.819644, + "stddev_ts": 0.0027, + "samples_ns": [ + 280892864249, + 281594854319, + 281634491535 + ], + "samples_ts": [ + 1.82276, + 1.81822, + 1.81796 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1433 + }, + { + "timestamp_utc": "2025-12-14T20:03:35.750691+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T19:48:34Z\",\n \"avg_ns\": 172998923224,\n \"stddev_ns\": 6203552,\n \"avg_ts\": 2.959556,\n \"stddev_ts\": 0.000106,\n \"samples_ns\": [ 172998870733, 172992774035, 173005124906 ],\n \"samples_ts\": [ 2.95956, 2.95966, 2.95945 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T20:00:06Z\",\n \"avg_ns\": 69471240153,\n \"stddev_ns\": 73774985,\n \"avg_ts\": 1.842490,\n \"stddev_ts\": 0.001957,\n \"samples_ns\": [ 69395586091, 69542978195, 69475156175 ],\n \"samples_ts\": [ 1.8445, 1.84059, 1.84239 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T19:48:34Z", + "avg_ns": 172998923224, + "stddev_ns": 6203552, + "avg_ts": 2.959556, + "stddev_ts": 0.000106, + "samples_ns": [ + 172998870733, + 172992774035, + 173005124906 + ], + "samples_ts": [ + 2.95956, + 2.95966, + 2.95945 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T20:00:06Z", + "avg_ns": 69471240153, + "stddev_ns": 73774985, + "avg_ts": 1.84249, + "stddev_ts": 0.001957, + "samples_ns": [ + 69395586091, + 69542978195, + 69475156175 + ], + "samples_ts": [ + 1.8445, + 1.84059, + 1.84239 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1434 + }, + { + "timestamp_utc": "2025-12-14T20:29:14.042027+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "256", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T20:03:37Z\",\n \"avg_ns\": 173016997341,\n \"stddev_ns\": 24933493,\n \"avg_ts\": 2.959247,\n \"stddev_ts\": 0.000426,\n \"samples_ns\": [ 173006088361, 172999377501, 173045526161 ],\n \"samples_ts\": [ 2.95943, 2.95955, 2.95876 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 256,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T20:15:09Z\",\n \"avg_ns\": 281409577721,\n \"stddev_ns\": 70250480,\n \"avg_ts\": 1.819412,\n \"stddev_ts\": 0.000454,\n \"samples_ns\": [ 281349563812, 281392326713, 281486842640 ],\n \"samples_ts\": [ 1.8198, 1.81952, 1.81891 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T20:03:37Z", + "avg_ns": 173016997341, + "stddev_ns": 24933493, + "avg_ts": 2.959247, + "stddev_ts": 0.000426, + "samples_ns": [ + 173006088361, + 172999377501, + 173045526161 + ], + "samples_ts": [ + 2.95943, + 2.95955, + 2.95876 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 256, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T20:15:09Z", + "avg_ns": 281409577721, + "stddev_ns": 70250480, + "avg_ts": 1.819412, + "stddev_ts": 0.000454, + "samples_ns": [ + 281349563812, + 281392326713, + 281486842640 + ], + "samples_ts": [ + 1.8198, + 1.81952, + 1.81891 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 256, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1435 + }, + { + "timestamp_utc": "2025-12-14T20:35:35.030293+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T20:29:15Z\",\n \"avg_ns\": 43055073094,\n \"stddev_ns\": 60515675,\n \"avg_ts\": 2.972940,\n \"stddev_ts\": 0.004175,\n \"samples_ns\": [ 43124790530, 43024307823, 43016120929 ],\n \"samples_ts\": [ 2.96813, 2.97506, 2.97563 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T20:32:07Z\",\n \"avg_ns\": 68942703151,\n \"stddev_ns\": 3093264821,\n \"avg_ts\": 1.856704,\n \"stddev_ts\": 0.015868,\n \"samples_ns\": [ 69101490525, 69434277948, 68292340982 ],\n \"samples_ts\": [ 1.85235, 1.84347, 1.8743 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T20:29:15Z", + "avg_ns": 43055073094, + "stddev_ns": 60515675, + "avg_ts": 2.97294, + "stddev_ts": 0.004175, + "samples_ns": [ + 43124790530, + 43024307823, + 43016120929 + ], + "samples_ts": [ + 2.96813, + 2.97506, + 2.97563 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T20:32:07Z", + "avg_ns": 68942703151, + "stddev_ns": 3093264821, + "avg_ts": 1.856704, + "stddev_ts": 0.015868, + "samples_ns": [ + 69101490525, + 69434277948, + 68292340982 + ], + "samples_ts": [ + 1.85235, + 1.84347, + 1.8743 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1436 + }, + { + "timestamp_utc": "2025-12-14T20:52:31.194438+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "128", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 128,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T20:35:36Z\",\n \"avg_ns\": 43018131314,\n \"stddev_ns\": 10331474,\n \"avg_ts\": 2.975490,\n \"stddev_ts\": 0.000715,\n \"samples_ns\": [ 43008310210, 43028906916, 43017176816 ],\n \"samples_ts\": [ 2.97617, 2.97474, 2.97556 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T20:38:28Z\",\n \"avg_ns\": 280700059122,\n \"stddev_ns\": 1341354458,\n \"avg_ts\": 1.824039,\n \"stddev_ts\": 0.008740,\n \"samples_ns\": [ 279151207000, 281479389224, 281469581144 ],\n \"samples_ts\": [ 1.83413, 1.81896, 1.81902 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 128, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T20:35:36Z", + "avg_ns": 43018131314, + "stddev_ns": 10331474, + "avg_ts": 2.97549, + "stddev_ts": 0.000715, + "samples_ns": [ + 43008310210, + 43028906916, + 43017176816 + ], + "samples_ts": [ + 2.97617, + 2.97474, + 2.97556 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T20:38:28Z", + "avg_ns": 280700059122, + "stddev_ns": 1341354458, + "avg_ts": 1.824039, + "stddev_ts": 0.00874, + "samples_ns": [ + 279151207000, + 281479389224, + 281469581144 + ], + "samples_ts": [ + 1.83413, + 1.81896, + 1.81902 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 128, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1437 + }, + { + "timestamp_utc": "2025-12-14T21:07:36.409571+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "128", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T20:52:32Z\",\n \"avg_ns\": 173895856954,\n \"stddev_ns\": 16354538,\n \"avg_ts\": 2.944291,\n \"stddev_ts\": 0.000277,\n \"samples_ns\": [ 173887247163, 173885617891, 173914705810 ],\n \"samples_ts\": [ 2.94444, 2.94446, 2.94397 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 128,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T21:04:08Z\",\n \"avg_ns\": 69210838663,\n \"stddev_ns\": 489761923,\n \"avg_ts\": 1.849483,\n \"stddev_ts\": 0.013141,\n \"samples_ns\": [ 68645382905, 69485723608, 69501409477 ],\n \"samples_ts\": [ 1.86466, 1.84211, 1.84169 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T20:52:32Z", + "avg_ns": 173895856954, + "stddev_ns": 16354538, + "avg_ts": 2.944291, + "stddev_ts": 0.000277, + "samples_ns": [ + 173887247163, + 173885617891, + 173914705810 + ], + "samples_ts": [ + 2.94444, + 2.94446, + 2.94397 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 128, + "n_depth": 0, + "test_time": "2025-12-14T21:04:08Z", + "avg_ns": 69210838663, + "stddev_ns": 489761923, + "avg_ts": 1.849483, + "stddev_ts": 0.013141, + "samples_ns": [ + 68645382905, + 69485723608, + 69501409477 + ], + "samples_ts": [ + 1.86466, + 1.84211, + 1.84169 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 128, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1438 + }, + { + "timestamp_utc": "2025-12-14T21:33:16.667917+00:00", + "command": [ + "/home/thomas/sunkiss/inference/llama.cpp/build/bin/llama-bench", + "--model", + "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "--threads", + "4", + "--batch-size", + "2048", + "--ubatch-size", + "512", + "--n-prompt", + "512", + "--n-gen", + "512", + "--repetitions", + "3", + "--output", + "json" + ], + "returncode": 0, + "stdout": "[\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 512,\n \"n_gen\": 0,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T21:07:37Z\",\n \"avg_ns\": 173913718845,\n \"stddev_ns\": 13902901,\n \"avg_ts\": 2.943989,\n \"stddev_ts\": 0.000235,\n \"samples_ns\": [ 173897680271, 173921611850, 173921864416 ],\n \"samples_ts\": [ 2.94426, 2.94385, 2.94385 ]\n },\n {\n \"build_commit\": \"2fa51c19b\",\n \"build_number\": 7326,\n \"cpu_info\": \"CPU\",\n \"gpu_info\": \"\",\n \"backends\": \"CPU\",\n \"model_filename\": \"/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf\",\n \"model_type\": \"gemma3 12B Q2_K - Medium\",\n \"model_size\": 4761669504,\n \"model_n_params\": 11766034176,\n \"n_batch\": 2048,\n \"n_ubatch\": 512,\n \"n_threads\": 4,\n \"cpu_mask\": \"0x0\",\n \"cpu_strict\": false,\n \"poll\": 50,\n \"type_k\": \"f16\",\n \"type_v\": \"f16\",\n \"n_gpu_layers\": 99,\n \"n_cpu_moe\": 0,\n \"split_mode\": \"layer\",\n \"main_gpu\": 0,\n \"no_kv_offload\": false,\n \"flash_attn\": false,\n \"devices\": \"auto\",\n \"tensor_split\": \"0.00\",\n \"tensor_buft_overrides\": \"none\",\n \"use_mmap\": true,\n \"embeddings\": false,\n \"no_op_offload\": 0,\n \"no_host\": false,\n \"n_prompt\": 0,\n \"n_gen\": 512,\n \"n_depth\": 0,\n \"test_time\": \"2025-12-14T21:19:13Z\",\n \"avg_ns\": 280856241486,\n \"stddev_ns\": 646392997,\n \"avg_ts\": 1.823003,\n \"stddev_ts\": 0.004198,\n \"samples_ns\": [ 280168316017, 280949419813, 281450988628 ],\n \"samples_ts\": [ 1.82747, 1.82239, 1.81914 ]\n }\n]\n", + "stderr": "", + "parsed": [ + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 512, + "n_gen": 0, + "n_depth": 0, + "test_time": "2025-12-14T21:07:37Z", + "avg_ns": 173913718845, + "stddev_ns": 13902901, + "avg_ts": 2.943989, + "stddev_ts": 0.000235, + "samples_ns": [ + 173897680271, + 173921611850, + 173921864416 + ], + "samples_ts": [ + 2.94426, + 2.94385, + 2.94385 + ] + }, + { + "build_commit": "2fa51c19b", + "build_number": 7326, + "cpu_info": "CPU", + "gpu_info": "", + "backends": "CPU", + "model_filename": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_type": "gemma3 12B Q2_K - Medium", + "model_size": 4761669504, + "model_n_params": 11766034176, + "n_batch": 2048, + "n_ubatch": 512, + "n_threads": 4, + "cpu_mask": "0x0", + "cpu_strict": false, + "poll": 50, + "type_k": "f16", + "type_v": "f16", + "n_gpu_layers": 99, + "n_cpu_moe": 0, + "split_mode": "layer", + "main_gpu": 0, + "no_kv_offload": false, + "flash_attn": false, + "devices": "auto", + "tensor_split": "0.00", + "tensor_buft_overrides": "none", + "use_mmap": true, + "embeddings": false, + "no_op_offload": 0, + "no_host": false, + "n_prompt": 0, + "n_gen": 512, + "n_depth": 0, + "test_time": "2025-12-14T21:19:13Z", + "avg_ns": 280856241486, + "stddev_ns": 646392997, + "avg_ts": 1.823003, + "stddev_ts": 0.004198, + "samples_ns": [ + 280168316017, + 280949419813, + 281450988628 + ], + "samples_ts": [ + 1.82747, + 1.82239, + 1.81914 + ] + } + ], + "params": { + "model_path": "/home/thomas/sunkiss/inference/models-cache/unsloth__gemma-3-12B-it-GGUF/gemma-3-12b-it-Q2_K_L.gguf", + "model_repo": "unsloth/gemma-3-12B-it-GGUF", + "quantization": "Q2_K_L", + "threads": 4, + "batch_size": 2048, + "ubatch_size": 512, + "n_prompt": 512, + "n_gen": 512, + "repetitions": 3, + "numa": null, + "priority": 0, + "progress": false + }, + "run_index": 1439 + } + ], + "plots": [ + "/home/thomas/sunkiss/inference/result/throughput_vs_threads.png", + "/home/thomas/sunkiss/inference/result/throughput_vs_batch.png", + "/home/thomas/sunkiss/inference/result/latency_vs_threads.png" + ] +}